Refactor scheduler.py to use the application-level pooled DatabaseClient

Replace the per-invocation DatabaseClient creation in run_scheduled_analysis() with the shared pooled client from SPARC.auth.get_db_client(). This avoids creating a new database connection on every scheduler tick, which could exhaust the connection pool under load. Key changes: - Import get_db_client from SPARC.auth instead of DatabaseClient - Remove manual connect/initialize_schema/close calls - Remove unused SPARC.config import Closes leeworks-agents/SPARC#1658 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-20 19:16:54 +00:00
2 changed files with 12 additions and 218 deletions
@@ -2,14 +2,17 @@

 Uses APScheduler to periodically re-analyze tracked companies and
 detect significant changes in patent counts.
+
+The scheduler reuses the application-level pooled DatabaseClient
+(from ``SPARC.auth``) instead of creating its own connection, which
+avoids exhausting the database connection pool under load.
 """

 import logging
 import os

-from SPARC import config
 from SPARC.analyzer import CompanyAnalyzer
-from SPARC.database import DatabaseClient
+from SPARC.auth import get_db_client

 logger = logging.getLogger(__name__)

@@ -21,10 +24,13 @@ CHANGE_THRESHOLD_PERCENT = int(os.getenv("CHANGE_THRESHOLD_PERCENT", "20"))


 def run_scheduled_analysis() -> None:
-    """Re-analyze all tracked companies and check for significant changes."""
-    db = DatabaseClient(config.database_url)
-    db.connect()
-    db.initialize_schema()
+    """Re-analyze all tracked companies and check for significant changes.
+
+    Uses the shared pooled DatabaseClient from ``SPARC.auth.get_db_client()``
+    rather than creating a disposable connection, so the scheduler participates
+    in the same connection pool as the rest of the application.
+    """
+    db = get_db_client()

    tracked = db.list_tracked_companies()
    if not tracked:
@@ -74,7 +80,6 @@ def run_scheduled_analysis() -> None:
        except Exception as e:
            logger.error("Error analyzing tracked company %s: %s", name, e)

-    db.close()
    logger.info("Scheduled analysis complete")


@@ -1,211 +0,0 @@
-"""Tests for analyze_single_patent auto-download path.
-
-Covers issue #1661:
- PDF exists on disk: direct analysis (happy path)
- PDF not on disk, cached link exists: auto-download and analyze
- PDF not on disk, no cached link: FileNotFoundError
- Analysis failure after PDF found: graceful error message
- Model override parameter passthrough
-"""
-
-import os
-from unittest.mock import MagicMock, patch
-
-import pytest
-
-from SPARC.analyzer import CompanyAnalyzer
-from SPARC.types import Patent
-
-
-@pytest.fixture(autouse=True)
-def mock_db(mocker):
-    """Mock DatabaseClient so no real DB is needed."""
-    mock_db_cls = mocker.patch("SPARC.analyzer.DatabaseClient")
-    mock_db_instance = MagicMock()
-    mock_db_instance.get_cached_patent.return_value = None
-    mock_db_instance.get_cached_serp_query.return_value = None
-    mock_db_cls.return_value = mock_db_instance
-    return mock_db_instance
-
-
-@pytest.fixture
-def analyzer(mocker, mock_db):
-    """Create a CompanyAnalyzer with mocked LLM and DB."""
-    mocker.patch("SPARC.analyzer.LLMAnalyzer")
-    return CompanyAnalyzer(openrouter_api_key="test-key")
-
-
-class TestAnalyzeSinglePatentAutoDownload:
-    """Test the auto-download logic in analyze_single_patent."""
-
-    def test_pdf_on_disk_analyzed_directly(self, analyzer, mocker, tmp_path):
-        """When PDF exists on disk, it is analyzed directly without download."""
-        patent_id = "US-11234567-B2"
-
-        # Create the patents dir and PDF file
-        patents_dir = tmp_path / "patents"
-        patents_dir.mkdir()
-        pdf_path = patents_dir / f"{patent_id}.pdf"
-        pdf_path.write_bytes(b"fake PDF content")
-
-        mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
-        mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
-        mock_parse.return_value = {"abstract": "test", "claims": "test claims"}
-        mock_minimize.return_value = "minimized content"
-        analyzer.llm_analyzer.analyze_patent_content.return_value = "Good patent."
-
-        # Change cwd so patents/{patent_id}.pdf resolves to our tmp_path
-        original_cwd = os.getcwd()
-        os.chdir(tmp_path)
-        try:
-            result = analyzer.analyze_single_patent(patent_id, "TestCo")
-        finally:
-            os.chdir(original_cwd)
-
-        assert result == "Good patent."
-        # DB cache should not have been queried since file existed
-        analyzer.db.get_cached_patent.assert_not_called()
-
-    def test_auto_download_from_cached_link(self, analyzer, mocker, tmp_path):
-        """When PDF is not on disk but link is cached, auto-download occurs."""
-        patent_id = "US-99887766-A1"
-
-        # No patents dir exists (PDF not on disk)
-        mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
-        downloaded_patent = Patent(patent_id=patent_id, pdf_link="https://example.com/patent.pdf")
-        downloaded_patent.pdf_path = f"patents/{patent_id}.pdf"
-        mock_save.return_value = downloaded_patent
-
-        # Cached patent has a PDF link
-        analyzer.db.get_cached_patent.return_value = {
-            "patent_id": patent_id,
-            "pdf_link": "https://example.com/patent.pdf",
-        }
-
-        # Mock the rest of the analysis pipeline
-        mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
-        mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
-        mock_parse.return_value = {"abstract": "test abstract"}
-        mock_minimize.return_value = "minimized content"
-        analyzer.llm_analyzer.analyze_patent_content.return_value = "Strong innovation."
-
-        # Change cwd so patents/{patent_id}.pdf does NOT exist
-        original_cwd = os.getcwd()
-        os.chdir(tmp_path)
-        try:
-            result = analyzer.analyze_single_patent(patent_id, "DownloadCo")
-        finally:
-            os.chdir(original_cwd)
-
-        assert result == "Strong innovation."
-        analyzer.db.get_cached_patent.assert_called_once_with(patent_id)
-        mock_save.assert_called_once()
-        # Verify the Patent passed to save_patents has the correct ID and link
-        saved_patent = mock_save.call_args[0][0]
-        assert saved_patent.patent_id == patent_id
-        assert saved_patent.pdf_link == "https://example.com/patent.pdf"
-
-    def test_no_cached_link_raises_file_not_found(self, analyzer, mocker, tmp_path):
-        """When PDF is not on disk and no cached link, FileNotFoundError raised."""
-        patent_id = "US-00000000-X1"
-
-        analyzer.db.get_cached_patent.return_value = None
-
-        original_cwd = os.getcwd()
-        os.chdir(tmp_path)
-        try:
-            with pytest.raises(FileNotFoundError, match="no download link is cached"):
-                analyzer.analyze_single_patent(patent_id, "MissingCo")
-        finally:
-            os.chdir(original_cwd)
-
-    def test_cached_patent_without_pdf_link_raises(self, analyzer, mocker, tmp_path):
-        """When cached patent exists but has no pdf_link, FileNotFoundError raised."""
-        patent_id = "US-11111111-B1"
-
-        analyzer.db.get_cached_patent.return_value = {
-            "patent_id": patent_id,
-            "pdf_link": None,
-        }
-
-        original_cwd = os.getcwd()
-        os.chdir(tmp_path)
-        try:
-            with pytest.raises(FileNotFoundError, match="no download link is cached"):
-                analyzer.analyze_single_patent(patent_id, "NoPDFCo")
-        finally:
-            os.chdir(original_cwd)
-
-    def test_analysis_exception_returns_error_message(self, analyzer, mocker, tmp_path):
-        """When analysis pipeline fails, returns error string instead of raising."""
-        patent_id = "US-22222222-A2"
-
-        # Create the PDF on disk so it skips download
-        patents_dir = tmp_path / "patents"
-        patents_dir.mkdir()
-        (patents_dir / f"{patent_id}.pdf").write_bytes(b"fake PDF")
-
-        # Parse fails
-        mocker.patch(
-            "SPARC.analyzer.SERP.parse_patent_pdf",
-            side_effect=ValueError("Corrupt PDF"),
-        )
-
-        original_cwd = os.getcwd()
-        os.chdir(tmp_path)
-        try:
-            result = analyzer.analyze_single_patent(patent_id, "ErrorCo")
-        finally:
-            os.chdir(original_cwd)
-
-        assert "Failed to analyze patent" in result
-        assert "Corrupt PDF" in result
-
-    def test_model_override_passed_to_llm(self, analyzer, mocker, tmp_path):
-        """The model parameter is forwarded to the LLM analyzer."""
-        patent_id = "US-33333333-B2"
-
-        patents_dir = tmp_path / "patents"
-        patents_dir.mkdir()
-        (patents_dir / f"{patent_id}.pdf").write_bytes(b"fake PDF")
-
-        mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf", return_value={"abstract": "test"})
-        mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm", return_value="content")
-        analyzer.llm_analyzer.analyze_patent_content.return_value = "Analysis result."
-
-        original_cwd = os.getcwd()
-        os.chdir(tmp_path)
-        try:
-            result = analyzer.analyze_single_patent(
-                patent_id, "ModelCo", model="openai/gpt-4o"
-            )
-        finally:
-            os.chdir(original_cwd)
-
-        assert result == "Analysis result."
-        analyzer.llm_analyzer.analyze_patent_content.assert_called_once_with(
-            patent_content="content",
-            company_name="ModelCo",
-            model="openai/gpt-4o",
-        )
-
-    def test_file_not_found_during_parse_re_raised(self, analyzer, mocker, tmp_path):
-        """FileNotFoundError during parsing is re-raised, not caught."""
-        patent_id = "US-44444444-C1"
-
-        patents_dir = tmp_path / "patents"
-        patents_dir.mkdir()
-        (patents_dir / f"{patent_id}.pdf").write_bytes(b"fake PDF")
-
-        mocker.patch(
-            "SPARC.analyzer.SERP.parse_patent_pdf",
-            side_effect=FileNotFoundError("PDF file vanished"),
-        )
-
-        original_cwd = os.getcwd()
-        os.chdir(tmp_path)
-        try:
-            with pytest.raises(FileNotFoundError, match="PDF file vanished"):
-                analyzer.analyze_single_patent(patent_id, "VanishCo")
-        finally:
-            os.chdir(original_cwd)