forked from 0xWheatyz/SPARC
test(analyzer,serp): add tests for caching, single query, and parallel processing
- Add TestSingleQueryBugFix: verify SERP.query called once per analysis - Add TestPatentCaching: DB cache hit/miss, SERP query cache hit/miss - Add TestDynamicDateRange: rolling window, days_back param - Add TestFilesystemPDFCaching: skip download, redownload empty files - Add autouse mock_db fixture to prevent real DB connections in all tests
This commit is contained in:
@@ -1,7 +1,11 @@
|
||||
"""Tests for SERP API patent retrieval and parsing functionality."""
|
||||
|
||||
import os
|
||||
import pytest
|
||||
from unittest.mock import patch, Mock
|
||||
from datetime import datetime, timedelta
|
||||
from SPARC.serp_api import SERP
|
||||
from SPARC.types import Patent
|
||||
|
||||
|
||||
class TestTextCleaning:
|
||||
@@ -176,3 +180,89 @@ class TestPatentMinimization:
|
||||
|
||||
# Sections should be separated by double newlines
|
||||
assert "\n\n" in result
|
||||
|
||||
|
||||
class TestDynamicDateRange:
|
||||
"""Test dynamic date range computation in SERP.query."""
|
||||
|
||||
def test_query_uses_rolling_date_window(self, mocker):
|
||||
"""Verify the date filter uses a rolling window, not hardcoded dates."""
|
||||
mock_search = mocker.patch("SPARC.serp_api.serpapi.search")
|
||||
mock_search.return_value = {"organic_results": []}
|
||||
mocker.patch("SPARC.serp_api.config.api_key", "fake-key")
|
||||
mocker.patch("SPARC.serp_api.config.patent_search_days", 90)
|
||||
|
||||
SERP.query("TestCorp")
|
||||
|
||||
call_params = mock_search.call_args[0][0]
|
||||
tbs = call_params["tbs"]
|
||||
# Should contain "cdr:1,cd_min:" with a date, not the old hardcoded one
|
||||
assert "cdr:1,cd_min:" in tbs
|
||||
assert "10/28/2025" not in tbs # old hardcoded date gone
|
||||
|
||||
def test_query_respects_days_back_param(self, mocker):
|
||||
"""Verify days_back parameter controls the date window."""
|
||||
mock_search = mocker.patch("SPARC.serp_api.serpapi.search")
|
||||
mock_search.return_value = {"organic_results": []}
|
||||
mocker.patch("SPARC.serp_api.config.api_key", "fake-key")
|
||||
mocker.patch("SPARC.serp_api.config.patent_search_days", 90)
|
||||
|
||||
now = datetime.now()
|
||||
SERP.query("TestCorp", days_back=30)
|
||||
|
||||
call_params = mock_search.call_args[0][0]
|
||||
tbs = call_params["tbs"]
|
||||
expected_start = (now - timedelta(days=30)).strftime("%-m/%-d/%Y")
|
||||
assert expected_start in tbs
|
||||
|
||||
|
||||
class TestFilesystemPDFCaching:
|
||||
"""Test that save_patents skips download for existing files."""
|
||||
|
||||
def test_save_patents_skips_download_when_cached(self, mocker, tmp_path):
|
||||
"""Already-downloaded PDFs should not be re-downloaded."""
|
||||
mock_get = mocker.patch("SPARC.serp_api.requests.get")
|
||||
mocker.patch("SPARC.serp_api.os.makedirs")
|
||||
|
||||
pdf_path = tmp_path / "US123.pdf"
|
||||
pdf_path.write_bytes(b"%PDF-1.4 fake content")
|
||||
|
||||
mocker.patch("SPARC.serp_api.os.path.exists", return_value=True)
|
||||
mocker.patch("SPARC.serp_api.os.path.getsize", return_value=100)
|
||||
|
||||
patent = Patent(patent_id="US123", pdf_link="http://example.com/test.pdf")
|
||||
result = SERP.save_patents(patent)
|
||||
|
||||
mock_get.assert_not_called()
|
||||
assert result.pdf_path == "patents/US123.pdf"
|
||||
|
||||
def test_save_patents_downloads_when_not_cached(self, mocker):
|
||||
"""Missing PDFs should be downloaded."""
|
||||
mock_response = Mock()
|
||||
mock_response.content = b"%PDF-1.4 content"
|
||||
mock_get = mocker.patch("SPARC.serp_api.requests.get", return_value=mock_response)
|
||||
mocker.patch("SPARC.serp_api.os.makedirs")
|
||||
mocker.patch("SPARC.serp_api.os.path.exists", return_value=False)
|
||||
mock_open = mocker.patch("builtins.open", mocker.mock_open())
|
||||
|
||||
patent = Patent(patent_id="US456", pdf_link="http://example.com/test.pdf")
|
||||
result = SERP.save_patents(patent)
|
||||
|
||||
mock_get.assert_called_once_with("http://example.com/test.pdf")
|
||||
assert result.pdf_path == "patents/US456.pdf"
|
||||
|
||||
def test_save_patents_redownloads_empty_files(self, mocker):
|
||||
"""Empty/corrupt PDFs (0 bytes) should be re-downloaded."""
|
||||
mock_response = Mock()
|
||||
mock_response.content = b"%PDF-1.4 content"
|
||||
mock_get = mocker.patch("SPARC.serp_api.requests.get", return_value=mock_response)
|
||||
mocker.patch("SPARC.serp_api.os.makedirs")
|
||||
mocker.patch("SPARC.serp_api.os.path.exists", return_value=True)
|
||||
mocker.patch("SPARC.serp_api.os.path.getsize", return_value=0)
|
||||
mock_open = mocker.patch("builtins.open", mocker.mock_open())
|
||||
|
||||
patent = Patent(patent_id="US789", pdf_link="http://example.com/test.pdf")
|
||||
result = SERP.save_patents(patent)
|
||||
|
||||
mock_get.assert_called_once()
|
||||
assert result.pdf_path == "patents/US789.pdf"
|
||||
|
||||
Reference in New Issue
Block a user