From b32eebff8ad93fee00f6873cd7c5f1ff223d5386 Mon Sep 17 00:00:00 2001 From: agent-company Date: Sun, 19 Apr 2026 20:06:10 +0000 Subject: [PATCH 01/15] ci: enable ruff linting and pytest in CI pipeline Uncomment the ruff check and pytest steps in the Gitea Actions build workflow so that linting violations and test failures block image builds. Fix all pre-existing ruff violations (E402 import ordering in analyzer.py, F821 undefined name in api.py, I001 unsorted imports in test files, F401 unused import in test_rate_limit.py). Closes leeworks-agents/SPARC#1559 Closes leeworks-agents/SPARC#1560 Co-Authored-By: Claude Opus 4.6 (1M context) --- .gitea/workflows/build.yaml | 29 +++++++++++++++-------------- SPARC/analyzer.py | 4 ++-- SPARC/api.py | 8 ++++++-- tests/test_auth.py | 1 + tests/test_rate_limit.py | 3 ++- tests/test_security.py | 7 +++++++ 6 files changed, 33 insertions(+), 19 deletions(-) diff --git a/.gitea/workflows/build.yaml b/.gitea/workflows/build.yaml index 80acc27..aeeb5c1 100644 --- a/.gitea/workflows/build.yaml +++ b/.gitea/workflows/build.yaml @@ -28,10 +28,10 @@ jobs: run: | pip3 install -r requirements.txt ruff -# - name: Run ruff linter -# shell: sh -# run: | -# ruff check SPARC/ tests/ + - name: Run ruff linter + shell: sh + run: | + ruff check SPARC/ tests/ - name: Install Node.js and check TypeScript types shell: sh @@ -47,16 +47,17 @@ jobs: fi npx tsc --noEmit -# - name: Run pytest -# shell: sh -# env: -# DATABASE_URL: "sqlite://" -# API_KEY: "test-key" -# OPENROUTER_API_KEY: "test-key" -# JWT_SECRET: "test-secret-for-ci" -# APP_ENV: "development" -# run: | -# python3 -m pytest tests/ -v --tb=short -x + - name: Run pytest + shell: sh + env: + DATABASE_URL: "sqlite://" + API_KEY: "test-key" + OPENROUTER_API_KEY: "test-key" + JWT_SECRET: "test-secret-for-ci" + APP_ENV: "development" + run: | + pip3 install pytest + python3 -m pytest tests/ -v --tb=short -x build-api: needs: test diff --git a/SPARC/analyzer.py b/SPARC/analyzer.py index 31ad7f1..1ebceaf 100644 --- a/SPARC/analyzer.py +++ b/SPARC/analyzer.py @@ -10,13 +10,13 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from typing import Callable from SPARC import config - -logger = logging.getLogger(__name__) from SPARC.database import DatabaseClient from SPARC.llm import LLMAnalyzer from SPARC.serp_api import SERP from SPARC.types import BatchAnalysisResult, CompanyAnalysisResult, Patent, Patents +logger = logging.getLogger(__name__) + class CompanyAnalyzer: """Orchestrates end-to-end company performance analysis via patents.""" diff --git a/SPARC/api.py b/SPARC/api.py index 3a28033..a42ddd7 100644 --- a/SPARC/api.py +++ b/SPARC/api.py @@ -3,9 +3,14 @@ Provides REST API endpoints for analyzing company patent portfolios. """ +from __future__ import annotations + from contextlib import asynccontextmanager from datetime import datetime -from typing import Annotated, List +from typing import TYPE_CHECKING, Annotated, List + +if TYPE_CHECKING: + from SPARC.database import DatabaseClient from fastapi import BackgroundTasks, Depends, FastAPI, HTTPException, Query, Request from fastapi.middleware.cors import CORSMiddleware @@ -653,7 +658,6 @@ async def export_company_pdf( PDF file download """ import io - import textwrap from reportlab.lib import colors from reportlab.lib.pagesizes import letter diff --git a/tests/test_auth.py b/tests/test_auth.py index de79259..bb4378a 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -175,6 +175,7 @@ class TestGetMe: from datetime import timedelta import jwt as pyjwt + from SPARC.auth import JWT_ALGORITHM, JWT_SECRET payload = { diff --git a/tests/test_rate_limit.py b/tests/test_rate_limit.py index f9f06af..8d0adf0 100644 --- a/tests/test_rate_limit.py +++ b/tests/test_rate_limit.py @@ -1,7 +1,8 @@ """Tests for rate limiting on auth endpoints.""" +from unittest.mock import MagicMock, patch + import pytest -from unittest.mock import Mock, patch, MagicMock from fastapi.testclient import TestClient from SPARC.api import app diff --git a/tests/test_security.py b/tests/test_security.py index b6e4be1..b34deec 100644 --- a/tests/test_security.py +++ b/tests/test_security.py @@ -14,6 +14,7 @@ class TestJWTSecretStartupCheck: with patch.dict(os.environ, {"APP_ENV": "production"}): # Reload config to pick up the new APP_ENV import importlib + import SPARC.config importlib.reload(SPARC.config) @@ -31,6 +32,7 @@ class TestJWTSecretStartupCheck: """Starting with default secret and APP_ENV=development must not raise.""" with patch.dict(os.environ, {"APP_ENV": "development"}): import importlib + import SPARC.config importlib.reload(SPARC.config) @@ -46,6 +48,7 @@ class TestJWTSecretStartupCheck: """Starting with a custom secret in production must not raise.""" with patch.dict(os.environ, {"APP_ENV": "production"}): import importlib + import SPARC.config importlib.reload(SPARC.config) @@ -65,6 +68,7 @@ class TestJWTSecretStartupCheck: env.pop("APP_ENV", None) with patch.dict(os.environ, env, clear=True): import importlib + import SPARC.config importlib.reload(SPARC.config) @@ -84,6 +88,7 @@ class TestCORSConfig: """When CORS_ORIGINS is unset, defaults to localhost origins.""" with patch.dict(os.environ, {"CORS_ORIGINS": ""}): import importlib + import SPARC.config importlib.reload(SPARC.config) assert SPARC.config.cors_origins == [ @@ -95,6 +100,7 @@ class TestCORSConfig: """Setting CORS_ORIGINS configures allowed origins.""" with patch.dict(os.environ, {"CORS_ORIGINS": "https://sparc.example.com,https://app.example.com"}): import importlib + import SPARC.config importlib.reload(SPARC.config) assert SPARC.config.cors_origins == [ @@ -109,6 +115,7 @@ class TestCORSConfig: """A single origin without comma works correctly.""" with patch.dict(os.environ, {"CORS_ORIGINS": "https://sparc.example.com"}): import importlib + import SPARC.config importlib.reload(SPARC.config) assert SPARC.config.cors_origins == ["https://sparc.example.com"] -- 2.52.0 From 2f2b6382fa30c067b7a8803b07c1bd9019698715 Mon Sep 17 00:00:00 2001 From: agent-company Date: Mon, 20 Apr 2026 06:05:54 +0000 Subject: [PATCH 02/15] Expand JWT auth integration tests from 17 to 33 cases Add comprehensive edge-case coverage for issue #1624: - Admin delete user endpoint (5 tests): successful delete, self-delete prevention, nonexistent user 404, non-admin 403, missing token rejection - Admin role change gaps (2 tests): nonexistent user 404, non-admin 403 - Input validation (3 tests): invalid email 422, short password 422, missing fields 422 for both register and login - Token edge cases (4 tests): malformed token, wrong-secret token, deleted user token, deleted user refresh - Token claim verification (1 test): login tokens contain correct claims All tests use mocked DB fixtures and require no live database. Closes leeworks-agents/SPARC#1624 Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/test_auth.py | 219 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 209 insertions(+), 10 deletions(-) diff --git a/tests/test_auth.py b/tests/test_auth.py index bb4378a..983c44b 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -1,13 +1,29 @@ -"""Tests for JWT authentication flow: register, login, protected routes, refresh, admin access.""" +"""Tests for JWT authentication flow: register, login, protected routes, refresh, admin access. -from datetime import datetime, timezone +Covers all five scenarios required by issue #1624: +1. Registration (POST /auth/register) +2. Login (POST /auth/login) +3. Protected route access (GET /auth/me) -- valid, missing, expired, wrong-type tokens +4. Token refresh (POST /auth/refresh) +5. Admin-only endpoints (GET /admin/users, PATCH role, DELETE user) + +All tests use mocked DB fixtures and require no live database. +""" + +from datetime import datetime, timedelta, timezone from unittest.mock import MagicMock, patch +import jwt as pyjwt import pytest from fastapi.testclient import TestClient from SPARC.api import app -from SPARC.auth import create_access_token, create_refresh_token +from SPARC.auth import ( + JWT_ALGORITHM, + JWT_SECRET, + create_access_token, + create_refresh_token, +) @pytest.fixture @@ -171,13 +187,6 @@ class TestGetMe: def test_expired_token_returns_401(self, client, mock_db): """An expired token should return 401.""" - # Create a token that has already expired - from datetime import timedelta - - import jwt as pyjwt - - from SPARC.auth import JWT_ALGORITHM, JWT_SECRET - payload = { "sub": "1", "email": "user@test.com", @@ -301,3 +310,193 @@ class TestAdminUsers: assert response.status_code == 400 assert "own role" in response.json()["detail"].lower() + + def test_role_change_nonexistent_user_returns_404(self, client, mock_db): + """Changing role for a user that does not exist should return 404.""" + admin = _make_admin_user() + mock_db.get_user_by_id.return_value = admin + mock_db.update_user_role.return_value = None + + response = client.patch( + "/admin/users/999/role", + json={"role": "admin"}, + headers=_auth_header(admin), + ) + + assert response.status_code == 404 + assert "not found" in response.json()["detail"].lower() + + def test_regular_user_cannot_change_role(self, client, mock_db): + """Non-admin user should receive 403 when trying to change roles.""" + user = _make_regular_user() + mock_db.get_user_by_id.return_value = user + + response = client.patch( + "/admin/users/1/role", + json={"role": "admin"}, + headers=_auth_header(user), + ) + + assert response.status_code == 403 + + +class TestAdminDeleteUser: + """DELETE /admin/users/{user_id}""" + + def test_admin_can_delete_user(self, client, mock_db): + """Admin should be able to delete another user.""" + admin = _make_admin_user() + mock_db.get_user_by_id.return_value = admin + mock_db.delete_user.return_value = True + + response = client.delete( + "/admin/users/2", + headers=_auth_header(admin), + ) + + assert response.status_code == 200 + assert "deleted" in response.json()["message"].lower() + mock_db.delete_user.assert_called_once_with(2) + + def test_admin_cannot_delete_self(self, client, mock_db): + """Admin should not be able to delete themselves.""" + admin = _make_admin_user() + mock_db.get_user_by_id.return_value = admin + + response = client.delete( + "/admin/users/1", + headers=_auth_header(admin), + ) + + assert response.status_code == 400 + assert "yourself" in response.json()["detail"].lower() + + def test_delete_nonexistent_user_returns_404(self, client, mock_db): + """Deleting a user that does not exist should return 404.""" + admin = _make_admin_user() + mock_db.get_user_by_id.return_value = admin + mock_db.delete_user.return_value = False + + response = client.delete( + "/admin/users/999", + headers=_auth_header(admin), + ) + + assert response.status_code == 404 + assert "not found" in response.json()["detail"].lower() + + def test_regular_user_cannot_delete_user(self, client, mock_db): + """Non-admin user should receive 403 when trying to delete users.""" + user = _make_regular_user() + mock_db.get_user_by_id.return_value = user + + response = client.delete( + "/admin/users/1", + headers=_auth_header(user), + ) + + assert response.status_code == 403 + + def test_no_token_cannot_delete_user(self, client): + """Missing token should be rejected for delete endpoint.""" + response = client.delete("/admin/users/1") + assert response.status_code in (401, 403) + + +class TestEdgeCases: + """Additional edge-case tests for auth robustness.""" + + def test_register_invalid_email_returns_422(self, client, mock_db): + """Registration with an invalid email format should return 422.""" + response = client.post( + "/auth/register", + json={"email": "not-an-email", "password": "securepass123"}, + ) + + assert response.status_code == 422 + + def test_register_short_password_returns_422(self, client, mock_db): + """Registration with a password shorter than 8 chars should return 422.""" + response = client.post( + "/auth/register", + json={"email": "user@test.com", "password": "short"}, + ) + + assert response.status_code == 422 + + def test_register_missing_fields_returns_422(self, client, mock_db): + """Registration with missing fields should return 422.""" + response = client.post("/auth/register", json={}) + assert response.status_code == 422 + + def test_login_missing_fields_returns_422(self, client, mock_db): + """Login with missing fields should return 422.""" + response = client.post("/auth/login", json={"email": "user@test.com"}) + assert response.status_code == 422 + + def test_malformed_token_returns_401(self, client, mock_db): + """A completely malformed token string should return 401.""" + response = client.get( + "/auth/me", + headers={"Authorization": "Bearer not.a.valid.jwt.token"}, + ) + assert response.status_code == 401 + + def test_token_with_wrong_secret_returns_401(self, client, mock_db): + """A token signed with a different secret should return 401.""" + payload = { + "sub": "1", + "email": "user@test.com", + "role": "user", + "exp": datetime.now(timezone.utc) + timedelta(hours=1), + "type": "access", + } + wrong_secret_token = pyjwt.encode(payload, "wrong-secret", algorithm=JWT_ALGORITHM) + + response = client.get( + "/auth/me", + headers={"Authorization": f"Bearer {wrong_secret_token}"}, + ) + assert response.status_code == 401 + + def test_token_for_deleted_user_returns_401(self, client, mock_db): + """A valid token for a user no longer in the DB should return 401.""" + user = _make_regular_user() + mock_db.get_user_by_id.return_value = None # user was deleted + + response = client.get("/auth/me", headers=_auth_header(user)) + assert response.status_code == 401 + + def test_refresh_for_deleted_user_returns_401(self, client, mock_db): + """Refreshing a token for a deleted user should return 401.""" + user = _make_regular_user() + mock_db.get_user_by_id.return_value = None + refresh = create_refresh_token(user["id"], user["email"], user["role"]) + + response = client.post( + "/auth/refresh", json={"refresh_token": refresh} + ) + assert response.status_code == 401 + + def test_login_returns_decodable_tokens(self, client, mock_db): + """Tokens returned by login should be decodable and contain expected claims.""" + user = _make_regular_user() + mock_db.authenticate_user.return_value = user + + response = client.post( + "/auth/login", + json={"email": "user@test.com", "password": "correctpassword"}, + ) + + data = response.json() + access_payload = pyjwt.decode( + data["access_token"], JWT_SECRET, algorithms=[JWT_ALGORITHM] + ) + assert access_payload["sub"] == str(user["id"]) + assert access_payload["email"] == user["email"] + assert access_payload["type"] == "access" + + refresh_payload = pyjwt.decode( + data["refresh_token"], JWT_SECRET, algorithms=[JWT_ALGORITHM] + ) + assert refresh_payload["type"] == "refresh" -- 2.52.0 From d4d43cf9b8837b01443bf23e5fac479425dedc4a Mon Sep 17 00:00:00 2001 From: agent-company Date: Mon, 20 Apr 2026 06:08:02 +0000 Subject: [PATCH 03/15] Fix prose-invert to only apply in dark mode on Analysis page The prose-invert class was applied unconditionally, causing inverted (light) text in light mode within the AI analysis results section. Changed to dark:prose-invert so it only activates when dark mode is enabled. Note: The broader dark mode feature (issue #1605) is already fully implemented -- ThemeContext, toggle button, CSS variables, dark: variants across all pages. This fix addresses the only remaining unstyled element. Closes leeworks-agents/SPARC#1605 Co-Authored-By: Claude Opus 4.6 (1M context) --- frontend/src/pages/Analysis.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/pages/Analysis.tsx b/frontend/src/pages/Analysis.tsx index 7ec67f7..2f4fc35 100644 --- a/frontend/src/pages/Analysis.tsx +++ b/frontend/src/pages/Analysis.tsx @@ -159,7 +159,7 @@ export function Analysis() { -
+
{result.analysis}
-- 2.52.0 From 44a162056da1bcfacfe3e008a0d473754b7cc9a1 Mon Sep 17 00:00:00 2001 From: agent-company Date: Mon, 20 Apr 2026 19:11:42 +0000 Subject: [PATCH 04/15] Add API tests for export endpoints (CSV and PDF) Covers GET /export/{company_name} and /export/{company_name}/pdf with 13 test cases: successful export, 404 on missing data, auth enforcement, filename sanitization, XML-special character handling in PDF, and multi-row output validation. Closes leeworks-agents/SPARC#1655 Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/test_export.py | 224 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 224 insertions(+) create mode 100644 tests/test_export.py diff --git a/tests/test_export.py b/tests/test_export.py new file mode 100644 index 0000000..d0c856f --- /dev/null +++ b/tests/test_export.py @@ -0,0 +1,224 @@ +"""Tests for export endpoints: CSV and PDF export of analysis results. + +Covers issue #1655: +- GET /export/{company_name} (CSV export) +- GET /export/{company_name}/pdf (PDF export) + +All tests mock the database layer and use JWT auth fixtures from test_auth patterns. +""" + +from datetime import datetime, timezone +from unittest.mock import MagicMock, patch + +import pytest +from fastapi.testclient import TestClient + +from SPARC.api import app +from SPARC.auth import create_access_token + + +@pytest.fixture +def client(): + """Create test client.""" + return TestClient(app) + + +@pytest.fixture(autouse=True) +def mock_db(): + """Mock the database client used by export and auth endpoints.""" + db = MagicMock() + + # Default: user exists for auth + db.get_user_by_id.return_value = { + "id": 1, + "email": "user@test.com", + "role": "user", + "created_at": datetime(2025, 1, 1, tzinfo=timezone.utc), + } + + # Mock get_conn for export queries + mock_cursor = MagicMock() + mock_conn = MagicMock() + mock_conn.cursor.return_value.__enter__ = MagicMock(return_value=mock_cursor) + mock_conn.cursor.return_value.__exit__ = MagicMock(return_value=False) + db.get_conn.return_value.__enter__ = MagicMock(return_value=mock_conn) + db.get_conn.return_value.__exit__ = MagicMock(return_value=False) + db._mock_cursor = mock_cursor + + with patch("SPARC.api.get_db_client", return_value=db), \ + patch("SPARC.auth.get_db_client", return_value=db): + yield db + + +def _auth_header(): + """Create an Authorization header with a valid access token.""" + token = create_access_token(1, "user@test.com", "user") + return {"Authorization": f"Bearer {token}"} + + +def _sample_rows(): + """Return sample llm_messages rows as tuples (matching cursor.fetchall format).""" + return [ + ( + "NVIDIA", + "company_analysis", + "anthropic/claude-3.5-sonnet", + "Strong AI patent portfolio with focus on GPU architectures.", + datetime(2025, 6, 15, 10, 30, 0), + ), + ( + "NVIDIA", + "patent_analysis", + "openai/gpt-4o", + "Patent US-12345678-B2 covers novel tensor core design.", + datetime(2025, 6, 14, 9, 0, 0), + ), + ] + + +class TestCSVExport: + """GET /export/{company_name} -- CSV export.""" + + def test_csv_export_success(self, client, mock_db): + """Valid company with results returns a CSV file.""" + mock_db._mock_cursor.fetchall.return_value = _sample_rows() + + response = client.get("/export/NVIDIA", headers=_auth_header()) + + assert response.status_code == 200 + assert response.headers["content-type"].startswith("text/csv") + assert "attachment" in response.headers.get("content-disposition", "") + assert "sparc_nvidia_export.csv" in response.headers["content-disposition"] + + # Verify CSV content (CSV uses \r\n line endings) + lines = response.text.strip().split("\n") + assert len(lines) == 3 # header + 2 data rows + assert lines[0].strip() == "company_name,analysis_type,model,analysis,timestamp" + assert "NVIDIA" in lines[1] + assert "company_analysis" in lines[1] + + def test_csv_export_no_results_returns_404(self, client, mock_db): + """Unknown company returns 404.""" + mock_db._mock_cursor.fetchall.return_value = [] + + response = client.get("/export/nonexistent", headers=_auth_header()) + + assert response.status_code == 404 + assert "No analysis results found" in response.json()["detail"] + + def test_csv_export_unauthenticated_returns_401(self, client): + """Request without token returns 401.""" + response = client.get("/export/NVIDIA") + assert response.status_code == 401 + + def test_csv_export_invalid_token_returns_401(self, client): + """Request with invalid token returns 401.""" + response = client.get( + "/export/NVIDIA", + headers={"Authorization": "Bearer invalid.token.here"}, + ) + assert response.status_code == 401 + + def test_csv_export_filename_sanitization(self, client, mock_db): + """Company names with spaces get sanitized in the filename.""" + mock_db._mock_cursor.fetchall.return_value = [ + ( + "Tesla Motors", + "company_analysis", + "anthropic/claude-3.5-sonnet", + "EV patent portfolio analysis.", + datetime(2025, 6, 15, 10, 0, 0), + ), + ] + + response = client.get("/export/Tesla Motors", headers=_auth_header()) + + assert response.status_code == 200 + assert "tesla_motors" in response.headers["content-disposition"] + + def test_csv_export_single_row(self, client, mock_db): + """Single analysis result produces valid CSV with one data row.""" + mock_db._mock_cursor.fetchall.return_value = [_sample_rows()[0]] + + response = client.get("/export/NVIDIA", headers=_auth_header()) + + assert response.status_code == 200 + lines = response.text.strip().split("\n") + assert len(lines) == 2 # header + 1 data row + + +class TestPDFExport: + """GET /export/{company_name}/pdf -- PDF report export.""" + + def test_pdf_export_success(self, client, mock_db): + """Valid company with results returns a PDF file.""" + mock_db._mock_cursor.fetchall.return_value = _sample_rows() + + response = client.get("/export/NVIDIA/pdf", headers=_auth_header()) + + assert response.status_code == 200 + assert response.headers["content-type"] == "application/pdf" + assert "attachment" in response.headers.get("content-disposition", "") + # PDF files start with %PDF + assert response.content[:4] == b"%PDF" + + def test_pdf_export_no_results_returns_404(self, client, mock_db): + """Unknown company returns 404.""" + mock_db._mock_cursor.fetchall.return_value = [] + + response = client.get("/export/nonexistent/pdf", headers=_auth_header()) + + assert response.status_code == 404 + assert "No analysis results found" in response.json()["detail"] + + def test_pdf_export_unauthenticated_returns_401(self, client): + """Request without token returns 401.""" + response = client.get("/export/NVIDIA/pdf") + assert response.status_code == 401 + + def test_pdf_export_invalid_token_returns_401(self, client): + """Request with invalid token returns 401.""" + response = client.get( + "/export/NVIDIA/pdf", + headers={"Authorization": "Bearer invalid.token.here"}, + ) + assert response.status_code == 401 + + def test_pdf_export_filename_contains_date(self, client, mock_db): + """PDF filename includes the analysis date.""" + mock_db._mock_cursor.fetchall.return_value = _sample_rows() + + response = client.get("/export/NVIDIA/pdf", headers=_auth_header()) + + assert response.status_code == 200 + disposition = response.headers["content-disposition"] + assert "nvidia-analysis-" in disposition + assert ".pdf" in disposition + + def test_pdf_export_special_chars_in_response(self, client, mock_db): + """Analysis text with XML-special chars (<, >, &) does not break PDF generation.""" + rows = [ + ( + "TestCo", + "company_analysis", + "anthropic/claude-3.5-sonnet", + "Revenue > $1B & growth <20% for Q4. Test escaping.", + datetime(2025, 6, 15, 10, 0, 0), + ), + ] + mock_db._mock_cursor.fetchall.return_value = rows + + response = client.get("/export/TestCo/pdf", headers=_auth_header()) + + assert response.status_code == 200 + assert response.content[:4] == b"%PDF" + + def test_pdf_export_multiple_analyses(self, client, mock_db): + """Multiple analysis records produce a valid PDF with content.""" + mock_db._mock_cursor.fetchall.return_value = _sample_rows() + + response = client.get("/export/NVIDIA/pdf", headers=_auth_header()) + + assert response.status_code == 200 + # PDF should have reasonable size (more than just headers) + assert len(response.content) > 500 -- 2.52.0 From fc942b2aa44e1009fa4a1413946302ed96bc7bad Mon Sep 17 00:00:00 2001 From: agent-company Date: Mon, 20 Apr 2026 19:14:29 +0000 Subject: [PATCH 05/15] Add tests for tracked company admin endpoints and scheduler integration 20 test cases covering: - GET/POST/DELETE /admin/tracked endpoints with admin auth enforcement - GET /admin/alerts with limit parameter and auth - scheduler.run_scheduled_analysis() for multi-company analysis, alert triggering on significant patent count changes, graceful failure handling Closes leeworks-agents/SPARC#1656 Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/test_tracked_companies.py | 388 ++++++++++++++++++++++++++++++++ 1 file changed, 388 insertions(+) create mode 100644 tests/test_tracked_companies.py diff --git a/tests/test_tracked_companies.py b/tests/test_tracked_companies.py new file mode 100644 index 0000000..d1e96fa --- /dev/null +++ b/tests/test_tracked_companies.py @@ -0,0 +1,388 @@ +"""Tests for tracked company admin endpoints and scheduler integration. + +Covers issue #1656: +- GET /admin/tracked (list tracked companies) +- POST /admin/tracked (add a tracked company) +- DELETE /admin/tracked/{company_name} (remove a tracked company) +- GET /admin/alerts (list alerts) +- scheduler.run_scheduled_analysis() integration + +All tests mock the database layer and use JWT auth fixtures. +""" + +from datetime import datetime, timezone +from unittest.mock import MagicMock, patch, call + +import pytest +from fastapi.testclient import TestClient + +from SPARC.api import app +from SPARC.auth import create_access_token + + +@pytest.fixture +def client(): + """Create test client.""" + return TestClient(app) + + +@pytest.fixture(autouse=True) +def mock_db(): + """Mock the database client used by admin and auth endpoints.""" + db = MagicMock() + + # Default admin user for auth + db.get_user_by_id.return_value = { + "id": 1, + "email": "admin@test.com", + "role": "admin", + "created_at": datetime(2025, 1, 1, tzinfo=timezone.utc), + } + + with patch("SPARC.api.get_db_client", return_value=db), \ + patch("SPARC.auth.get_db_client", return_value=db): + yield db + + +def _admin_header(): + """Create an Authorization header with a valid admin access token.""" + token = create_access_token(1, "admin@test.com", "admin") + return {"Authorization": f"Bearer {token}"} + + +def _user_header(): + """Create an Authorization header with a regular user access token.""" + token = create_access_token(2, "user@test.com", "user") + return {"Authorization": f"Bearer {token}"} + + +# ---------- GET /admin/tracked ---------- + +class TestListTrackedCompanies: + """GET /admin/tracked""" + + def test_list_tracked_returns_companies(self, client, mock_db): + """Admin can list tracked companies.""" + mock_db.list_tracked_companies.return_value = [ + {"company_name": "NVIDIA", "last_patent_count": 120, "last_analyzed": "2025-06-15"}, + {"company_name": "AMD", "last_patent_count": 80, "last_analyzed": "2025-06-14"}, + ] + + response = client.get("/admin/tracked", headers=_admin_header()) + + assert response.status_code == 200 + data = response.json() + assert len(data) == 2 + assert data[0]["company_name"] == "NVIDIA" + + def test_list_tracked_empty(self, client, mock_db): + """Returns empty list when no companies are tracked.""" + mock_db.list_tracked_companies.return_value = [] + + response = client.get("/admin/tracked", headers=_admin_header()) + + assert response.status_code == 200 + assert response.json() == [] + + def test_list_tracked_requires_admin(self, client, mock_db): + """Regular user cannot access tracked companies list.""" + mock_db.get_user_by_id.return_value = { + "id": 2, + "email": "user@test.com", + "role": "user", + "created_at": datetime(2025, 1, 1, tzinfo=timezone.utc), + } + + response = client.get("/admin/tracked", headers=_user_header()) + + assert response.status_code == 403 + + def test_list_tracked_unauthenticated(self, client): + """Unauthenticated request returns 401.""" + response = client.get("/admin/tracked") + assert response.status_code == 401 + + +# ---------- POST /admin/tracked ---------- + +class TestAddTrackedCompany: + """POST /admin/tracked""" + + def test_add_tracked_company_success(self, client, mock_db): + """Admin can add a company to tracking.""" + mock_db.add_tracked_company.return_value = { + "company_name": "Intel", + "last_patent_count": 0, + "last_analyzed": None, + } + + response = client.post( + "/admin/tracked", + json={"company_name": "Intel"}, + headers=_admin_header(), + ) + + assert response.status_code == 200 + data = response.json() + assert data["company_name"] == "Intel" + mock_db.add_tracked_company.assert_called_once_with("Intel") + + def test_add_duplicate_returns_409(self, client, mock_db): + """Adding an already-tracked company returns 409.""" + mock_db.add_tracked_company.return_value = None + + response = client.post( + "/admin/tracked", + json={"company_name": "NVIDIA"}, + headers=_admin_header(), + ) + + assert response.status_code == 409 + assert "already tracked" in response.json()["detail"].lower() + + def test_add_tracked_requires_admin(self, client, mock_db): + """Regular user cannot add tracked companies.""" + mock_db.get_user_by_id.return_value = { + "id": 2, + "email": "user@test.com", + "role": "user", + "created_at": datetime(2025, 1, 1, tzinfo=timezone.utc), + } + + response = client.post( + "/admin/tracked", + json={"company_name": "Intel"}, + headers=_user_header(), + ) + + assert response.status_code == 403 + + def test_add_tracked_empty_name_rejected(self, client): + """Empty company name is rejected by validation.""" + response = client.post( + "/admin/tracked", + json={"company_name": ""}, + headers=_admin_header(), + ) + + assert response.status_code == 422 # Pydantic validation error + + +# ---------- DELETE /admin/tracked/{company_name} ---------- + +class TestRemoveTrackedCompany: + """DELETE /admin/tracked/{company_name}""" + + def test_remove_tracked_company_success(self, client, mock_db): + """Admin can remove a tracked company.""" + mock_db.remove_tracked_company.return_value = True + + response = client.delete( + "/admin/tracked/NVIDIA", + headers=_admin_header(), + ) + + assert response.status_code == 200 + assert "Stopped tracking" in response.json()["message"] + mock_db.remove_tracked_company.assert_called_once_with("NVIDIA") + + def test_remove_nonexistent_returns_404(self, client, mock_db): + """Removing a non-tracked company returns 404.""" + mock_db.remove_tracked_company.return_value = False + + response = client.delete( + "/admin/tracked/UnknownCorp", + headers=_admin_header(), + ) + + assert response.status_code == 404 + assert "not found" in response.json()["detail"].lower() + + def test_remove_tracked_requires_admin(self, client, mock_db): + """Regular user cannot remove tracked companies.""" + mock_db.get_user_by_id.return_value = { + "id": 2, + "email": "user@test.com", + "role": "user", + "created_at": datetime(2025, 1, 1, tzinfo=timezone.utc), + } + + response = client.delete( + "/admin/tracked/NVIDIA", + headers=_user_header(), + ) + + assert response.status_code == 403 + + +# ---------- GET /admin/alerts ---------- + +class TestListAlerts: + """GET /admin/alerts""" + + def test_list_alerts_returns_data(self, client, mock_db): + """Admin can list alerts.""" + mock_db.list_alerts.return_value = [ + { + "id": 1, + "company_name": "NVIDIA", + "alert_type": "patent_count_change", + "message": "Patent count increased by 25%", + "created_at": "2025-06-15T10:00:00Z", + }, + ] + + response = client.get("/admin/alerts", headers=_admin_header()) + + assert response.status_code == 200 + data = response.json() + assert len(data) == 1 + assert data[0]["alert_type"] == "patent_count_change" + + def test_list_alerts_with_limit(self, client, mock_db): + """Custom limit parameter is passed to the database.""" + mock_db.list_alerts.return_value = [] + + response = client.get("/admin/alerts?limit=10", headers=_admin_header()) + + assert response.status_code == 200 + mock_db.list_alerts.assert_called_once_with(limit=10) + + def test_list_alerts_requires_admin(self, client, mock_db): + """Regular user cannot access alerts.""" + mock_db.get_user_by_id.return_value = { + "id": 2, + "email": "user@test.com", + "role": "user", + "created_at": datetime(2025, 1, 1, tzinfo=timezone.utc), + } + + response = client.get("/admin/alerts", headers=_user_header()) + + assert response.status_code == 403 + + +# ---------- Scheduler integration ---------- + +class TestSchedulerIntegration: + """Tests for scheduler.run_scheduled_analysis().""" + + def test_no_tracked_companies_skips_analysis(self): + """Scheduler does nothing when no companies are tracked.""" + mock_db = MagicMock() + mock_db.list_tracked_companies.return_value = [] + + with patch("SPARC.scheduler.DatabaseClient", return_value=mock_db), \ + patch("SPARC.scheduler.CompanyAnalyzer") as mock_analyzer_cls: + from SPARC.scheduler import run_scheduled_analysis + run_scheduled_analysis() + + mock_analyzer_cls.assert_not_called() + + def test_scheduler_analyzes_each_tracked_company(self): + """Scheduler runs analysis for every tracked company.""" + mock_db = MagicMock() + mock_db.list_tracked_companies.return_value = [ + {"company_name": "NVIDIA", "last_patent_count": 100}, + {"company_name": "AMD", "last_patent_count": 50}, + ] + + mock_result_nvidia = MagicMock(success=True, patent_count=110) + mock_result_amd = MagicMock(success=True, patent_count=55) + mock_analyzer = MagicMock() + mock_analyzer._analyze_company_safe.side_effect = [mock_result_nvidia, mock_result_amd] + + with patch("SPARC.scheduler.DatabaseClient", return_value=mock_db), \ + patch("SPARC.scheduler.CompanyAnalyzer", return_value=mock_analyzer): + from SPARC.scheduler import run_scheduled_analysis + run_scheduled_analysis() + + assert mock_analyzer._analyze_company_safe.call_count == 2 + mock_db.update_tracked_company.assert_any_call("NVIDIA", 110) + mock_db.update_tracked_company.assert_any_call("AMD", 55) + + def test_scheduler_triggers_alert_on_significant_change(self): + """Scheduler stores an alert when patent count changes significantly.""" + mock_db = MagicMock() + mock_db.list_tracked_companies.return_value = [ + {"company_name": "Tesla", "last_patent_count": 100}, + ] + + mock_result = MagicMock(success=True, patent_count=130) # 30% increase + mock_analyzer = MagicMock() + mock_analyzer._analyze_company_safe.return_value = mock_result + + with patch("SPARC.scheduler.DatabaseClient", return_value=mock_db), \ + patch("SPARC.scheduler.CompanyAnalyzer", return_value=mock_analyzer): + from SPARC.scheduler import run_scheduled_analysis + run_scheduled_analysis() + + mock_db.store_alert.assert_called_once() + alert_kwargs = mock_db.store_alert.call_args + assert alert_kwargs[1]["company_name"] == "Tesla" + assert alert_kwargs[1]["alert_type"] == "patent_count_change" + assert alert_kwargs[1]["old_value"] == 100 + assert alert_kwargs[1]["new_value"] == 130 + + def test_scheduler_no_alert_for_small_change(self): + """Scheduler does not alert when change is below threshold.""" + mock_db = MagicMock() + mock_db.list_tracked_companies.return_value = [ + {"company_name": "Intel", "last_patent_count": 100}, + ] + + mock_result = MagicMock(success=True, patent_count=105) # 5% increase + mock_analyzer = MagicMock() + mock_analyzer._analyze_company_safe.return_value = mock_result + + with patch("SPARC.scheduler.DatabaseClient", return_value=mock_db), \ + patch("SPARC.scheduler.CompanyAnalyzer", return_value=mock_analyzer): + from SPARC.scheduler import run_scheduled_analysis + run_scheduled_analysis() + + mock_db.store_alert.assert_not_called() + + def test_scheduler_handles_analysis_failure(self): + """Scheduler continues when one company fails analysis.""" + mock_db = MagicMock() + mock_db.list_tracked_companies.return_value = [ + {"company_name": "FailCo", "last_patent_count": 50}, + {"company_name": "SuccessCo", "last_patent_count": 30}, + ] + + mock_fail_result = MagicMock(success=False, error="API timeout") + mock_ok_result = MagicMock(success=True, patent_count=35) + mock_analyzer = MagicMock() + mock_analyzer._analyze_company_safe.side_effect = [mock_fail_result, mock_ok_result] + + with patch("SPARC.scheduler.DatabaseClient", return_value=mock_db), \ + patch("SPARC.scheduler.CompanyAnalyzer", return_value=mock_analyzer): + from SPARC.scheduler import run_scheduled_analysis + run_scheduled_analysis() + + # FailCo should not get updated, SuccessCo should + mock_db.update_tracked_company.assert_called_once_with("SuccessCo", 35) + + def test_scheduler_handles_exception_in_analysis(self): + """Scheduler continues even when analysis raises an exception.""" + mock_db = MagicMock() + mock_db.list_tracked_companies.return_value = [ + {"company_name": "CrashCo", "last_patent_count": 10}, + {"company_name": "OKCo", "last_patent_count": 20}, + ] + + mock_ok_result = MagicMock(success=True, patent_count=22) + mock_analyzer = MagicMock() + mock_analyzer._analyze_company_safe.side_effect = [ + RuntimeError("unexpected error"), + mock_ok_result, + ] + + with patch("SPARC.scheduler.DatabaseClient", return_value=mock_db), \ + patch("SPARC.scheduler.CompanyAnalyzer", return_value=mock_analyzer): + from SPARC.scheduler import run_scheduled_analysis + run_scheduled_analysis() + + # OKCo should still be processed + mock_db.update_tracked_company.assert_called_once_with("OKCo", 22) + mock_db.close.assert_called_once() -- 2.52.0 From 2eabb1d704d3d80e1c1d0a3266997c4ca27f07b0 Mon Sep 17 00:00:00 2001 From: agent-company Date: Mon, 20 Apr 2026 19:15:34 +0000 Subject: [PATCH 06/15] Add webhook integration tests covering retry logic and Slack/Discord payloads 22 test cases covering: - Slack/Discord URL detection - Generic vs Slack payload formatting - Exponential backoff retry logic with network/timeout error handling - Multi-URL dispatch with format auto-detection - notify_job_completed() and notify_alert() helpers Closes leeworks-agents/SPARC#1657 Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/test_webhooks.py | 280 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 280 insertions(+) create mode 100644 tests/test_webhooks.py diff --git a/tests/test_webhooks.py b/tests/test_webhooks.py new file mode 100644 index 0000000..9950030 --- /dev/null +++ b/tests/test_webhooks.py @@ -0,0 +1,280 @@ +"""Tests for webhook notification system: retry logic and Slack/Discord payload format. + +Covers issue #1657: +- Retry logic with exponential backoff in _send_with_retry +- Slack/Discord payload formatting in _build_payload +- Generic HTTP POST payload formatting +- notify() dispatching to multiple URLs +- notify_job_completed() and notify_alert() convenience helpers +""" + +from datetime import datetime +from unittest.mock import MagicMock, patch, call + +import pytest +import requests + +from SPARC.webhooks import ( + MAX_RETRIES, + _build_payload, + _is_slack_url, + _send_with_retry, + notify, + notify_alert, + notify_job_completed, +) + + +class TestIsSlackUrl: + """Tests for Slack/Discord URL detection.""" + + def test_slack_webhook_url(self): + assert _is_slack_url("https://hooks.slack.com/services/T00/B00/xxx") is True + + def test_discord_webhook_url(self): + assert _is_slack_url("https://discord.com/api/webhooks/123/abc") is True + + def test_generic_url(self): + assert _is_slack_url("https://example.com/webhook") is False + + def test_empty_url(self): + assert _is_slack_url("") is False + + +class TestBuildPayload: + """Tests for payload construction.""" + + def test_generic_payload_structure(self): + """Generic payload includes event type, timestamp, and data.""" + payload = _build_payload("job_completed", {"job_id": "abc123"}) + + assert payload["event"] == "job_completed" + assert payload["job_id"] == "abc123" + assert "timestamp" in payload + # Timestamp should be ISO format ending with Z + assert payload["timestamp"].endswith("Z") + + def test_slack_payload_wraps_in_text(self): + """Slack payload wraps content in a 'text' field.""" + payload = _build_payload("patent_alert", {"company_name": "NVIDIA"}, slack=True) + + assert "text" in payload + assert "patent_alert" in payload["text"] + assert "NVIDIA" in payload["text"] + # Slack payload should NOT have the event/timestamp at top level + assert "event" not in payload + assert "timestamp" not in payload + + def test_generic_payload_does_not_have_text_field(self): + """Non-Slack payload does not wrap in text.""" + payload = _build_payload("job_completed", {"status": "done"}) + + assert "text" not in payload + assert payload["status"] == "done" + + def test_slack_payload_contains_bold_header(self): + """Slack payload starts with bold event header using Slack markdown.""" + payload = _build_payload("job_completed", {"count": 5}, slack=True) + + assert payload["text"].startswith("*[SPARC] job_completed*") + + def test_payload_merges_all_data_keys(self): + """All data keys are included in the generic payload.""" + data = {"key1": "val1", "key2": 42, "key3": True} + payload = _build_payload("test_event", data) + + assert payload["key1"] == "val1" + assert payload["key2"] == 42 + assert payload["key3"] is True + + +class TestSendWithRetry: + """Tests for retry logic in _send_with_retry.""" + + @patch("SPARC.webhooks.time.sleep") + @patch("SPARC.webhooks.requests.post") + def test_success_on_first_attempt(self, mock_post, mock_sleep): + """Successful delivery on first attempt, no retries.""" + mock_post.return_value = MagicMock(status_code=200) + + result = _send_with_retry("https://example.com/hook", {"event": "test"}) + + assert result is True + mock_post.assert_called_once() + mock_sleep.assert_not_called() + + @patch("SPARC.webhooks.time.sleep") + @patch("SPARC.webhooks.requests.post") + def test_success_on_second_attempt(self, mock_post, mock_sleep): + """Fails first, succeeds on retry.""" + mock_post.side_effect = [ + MagicMock(status_code=500), + MagicMock(status_code=200), + ] + + result = _send_with_retry("https://example.com/hook", {"event": "test"}) + + assert result is True + assert mock_post.call_count == 2 + mock_sleep.assert_called_once() + + @patch("SPARC.webhooks.time.sleep") + @patch("SPARC.webhooks.requests.post") + def test_all_retries_exhausted(self, mock_post, mock_sleep): + """Returns False after all retries fail.""" + mock_post.return_value = MagicMock(status_code=500) + + result = _send_with_retry("https://example.com/hook", {"event": "test"}) + + assert result is False + assert mock_post.call_count == MAX_RETRIES + assert mock_sleep.call_count == MAX_RETRIES - 1 + + @patch("SPARC.webhooks.time.sleep") + @patch("SPARC.webhooks.requests.post") + def test_exponential_backoff_timing(self, mock_post, mock_sleep): + """Backoff wait times follow exponential pattern (2^attempt).""" + mock_post.return_value = MagicMock(status_code=500) + + _send_with_retry("https://example.com/hook", {"event": "test"}) + + # With BACKOFF_BASE=2: attempt 1 -> sleep(2), attempt 2 -> sleep(4) + expected_waits = [call(2 ** i) for i in range(1, MAX_RETRIES)] + assert mock_sleep.call_args_list == expected_waits + + @patch("SPARC.webhooks.time.sleep") + @patch("SPARC.webhooks.requests.post") + def test_network_error_triggers_retry(self, mock_post, mock_sleep): + """Network exceptions trigger retry, not immediate failure.""" + mock_post.side_effect = [ + requests.ConnectionError("Connection refused"), + MagicMock(status_code=200), + ] + + result = _send_with_retry("https://example.com/hook", {"event": "test"}) + + assert result is True + assert mock_post.call_count == 2 + + @patch("SPARC.webhooks.time.sleep") + @patch("SPARC.webhooks.requests.post") + def test_timeout_error_triggers_retry(self, mock_post, mock_sleep): + """Timeout exceptions trigger retry.""" + mock_post.side_effect = [ + requests.Timeout("Request timed out"), + MagicMock(status_code=200), + ] + + result = _send_with_retry("https://example.com/hook", {"event": "test"}) + + assert result is True + assert mock_post.call_count == 2 + + @patch("SPARC.webhooks.time.sleep") + @patch("SPARC.webhooks.requests.post") + def test_2xx_status_codes_accepted(self, mock_post, mock_sleep): + """Any 2xx status code is treated as success.""" + mock_post.return_value = MagicMock(status_code=204) + + result = _send_with_retry("https://example.com/hook", {"event": "test"}) + + assert result is True + mock_post.assert_called_once() + + @patch("SPARC.webhooks.time.sleep") + @patch("SPARC.webhooks.requests.post") + def test_posts_json_payload(self, mock_post, mock_sleep): + """Payload is sent as JSON with correct timeout.""" + mock_post.return_value = MagicMock(status_code=200) + payload = {"event": "test", "data": "value"} + + _send_with_retry("https://example.com/hook", payload) + + mock_post.assert_called_once_with( + "https://example.com/hook", json=payload, timeout=10 + ) + + +class TestNotify: + """Tests for the notify() dispatcher.""" + + @patch("SPARC.webhooks._send_with_retry") + @patch("SPARC.webhooks.WEBHOOK_URLS", ["https://example.com/hook1", "https://example.com/hook2"]) + def test_dispatches_to_all_urls(self, mock_send): + """notify() sends to every configured webhook URL.""" + mock_send.return_value = True + + notify("job_completed", {"job_id": "test123"}) + + assert mock_send.call_count == 2 + + @patch("SPARC.webhooks._send_with_retry") + @patch("SPARC.webhooks.WEBHOOK_URLS", []) + def test_no_urls_configured_returns_immediately(self, mock_send): + """No-op when no webhook URLs are configured.""" + notify("job_completed", {"job_id": "test123"}) + + mock_send.assert_not_called() + + @patch("SPARC.webhooks._send_with_retry") + @patch("SPARC.webhooks.WEBHOOK_URLS", [ + "https://hooks.slack.com/services/T00/B00/xxx", + "https://example.com/generic", + ]) + def test_slack_url_gets_slack_payload(self, mock_send): + """Slack URLs receive Slack-formatted payloads, others get generic.""" + mock_send.return_value = True + + notify("test_event", {"key": "val"}) + + # First call (Slack URL) should have "text" key + slack_payload = mock_send.call_args_list[0][0][1] + assert "text" in slack_payload + + # Second call (generic URL) should have "event" key + generic_payload = mock_send.call_args_list[1][0][1] + assert "event" in generic_payload + assert generic_payload["event"] == "test_event" + + +class TestNotifyJobCompleted: + """Tests for notify_job_completed() convenience function.""" + + @patch("SPARC.webhooks.notify") + def test_sends_correct_event_and_data(self, mock_notify): + """Job completion sends proper event type and summary.""" + notify_job_completed( + job_id="batch-001", + status="completed", + total_companies=10, + successful=8, + failed=2, + ) + + mock_notify.assert_called_once() + event, data = mock_notify.call_args[0] + assert event == "job_completed" + assert data["job_id"] == "batch-001" + assert data["successful"] == 8 + assert data["failed"] == 2 + assert "8/10" in data["summary"] + + +class TestNotifyAlert: + """Tests for notify_alert() convenience function.""" + + @patch("SPARC.webhooks.notify") + def test_sends_correct_event_and_data(self, mock_notify): + """Alert notification sends patent_alert event type.""" + notify_alert( + company_name="NVIDIA", + alert_type="patent_count_change", + message="Patent count increased by 30%", + ) + + mock_notify.assert_called_once() + event, data = mock_notify.call_args[0] + assert event == "patent_alert" + assert data["company_name"] == "NVIDIA" + assert data["alert_type"] == "patent_count_change" + assert "30%" in data["message"] -- 2.52.0 From 417b7ab31e98fd6a98424bdfa56f176b90cfbe6b Mon Sep 17 00:00:00 2001 From: agent-company Date: Mon, 20 Apr 2026 19:16:54 +0000 Subject: [PATCH 07/15] Refactor scheduler.py to use the application-level pooled DatabaseClient Replace the per-invocation DatabaseClient creation in run_scheduled_analysis() with the shared pooled client from SPARC.auth.get_db_client(). This avoids creating a new database connection on every scheduler tick, which could exhaust the connection pool under load. Key changes: - Import get_db_client from SPARC.auth instead of DatabaseClient - Remove manual connect/initialize_schema/close calls - Remove unused SPARC.config import Closes leeworks-agents/SPARC#1658 Co-Authored-By: Claude Opus 4.6 (1M context) --- SPARC/scheduler.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/SPARC/scheduler.py b/SPARC/scheduler.py index 5af3940..4428bfd 100644 --- a/SPARC/scheduler.py +++ b/SPARC/scheduler.py @@ -2,14 +2,17 @@ Uses APScheduler to periodically re-analyze tracked companies and detect significant changes in patent counts. + +The scheduler reuses the application-level pooled DatabaseClient +(from ``SPARC.auth``) instead of creating its own connection, which +avoids exhausting the database connection pool under load. """ import logging import os -from SPARC import config from SPARC.analyzer import CompanyAnalyzer -from SPARC.database import DatabaseClient +from SPARC.auth import get_db_client logger = logging.getLogger(__name__) @@ -21,10 +24,13 @@ CHANGE_THRESHOLD_PERCENT = int(os.getenv("CHANGE_THRESHOLD_PERCENT", "20")) def run_scheduled_analysis() -> None: - """Re-analyze all tracked companies and check for significant changes.""" - db = DatabaseClient(config.database_url) - db.connect() - db.initialize_schema() + """Re-analyze all tracked companies and check for significant changes. + + Uses the shared pooled DatabaseClient from ``SPARC.auth.get_db_client()`` + rather than creating a disposable connection, so the scheduler participates + in the same connection pool as the rest of the application. + """ + db = get_db_client() tracked = db.list_tracked_companies() if not tracked: @@ -74,7 +80,6 @@ def run_scheduled_analysis() -> None: except Exception as e: logger.error("Error analyzing tracked company %s: %s", name, e) - db.close() logger.info("Scheduled analysis complete") -- 2.52.0 From 4cb1a6ed218e8511385fac88e539d5646ede778a Mon Sep 17 00:00:00 2001 From: agent-company Date: Mon, 20 Apr 2026 19:18:22 +0000 Subject: [PATCH 08/15] Update ROADMAP.md to reflect completed work and add next-horizon items Move all completed items (security hardening, structured logging, dark mode, export, webhooks, scheduled analysis, multi-model, trend charts, CI, etc.) into a new Completed section. Reorganize remaining P1/P2/P3 items to reflect current priorities. Add new next-horizon items: historical diffing, patent classification tagging, user API keys, batch export, and multi-tenant support. Closes leeworks-agents/SPARC#1659 Co-Authored-By: Claude Opus 4.6 (1M context) --- ROADMAP.md | 194 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 118 insertions(+), 76 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index 42b571a..5b177d9 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -7,86 +7,131 @@ Semiconductor Patent & Analytics Report Core -- development priorities. SPARC is a patent analysis platform with a working end-to-end pipeline: Python/FastAPI backend, React/TypeScript frontend, PostgreSQL for persistence and caching, Docker Compose for local development, and Gitea Actions CI/CD for -image builds. Core features (patent retrieval via SerpAPI, PDF parsing, LLM -analysis via OpenRouter/Claude, batch processing, JWT authentication, analytics -dashboard) are all implemented and functional. +image builds and testing. Core features include patent retrieval via SerpAPI, +PDF parsing, LLM analysis via OpenRouter (multi-model: Claude, GPT-4o, Gemini, +Llama), batch processing, JWT authentication, analytics dashboard with patent +trend charts, scheduled recurring analysis with alerting, webhook notifications +(Slack/Discord), CSV and PDF export, S3/MinIO storage, side-by-side company +comparison, and dark mode. + +--- + +## Completed + +Items that have been implemented and merged into main. + +### Security hardening + +- ~~Rotate default JWT secret.~~ Startup check refuses to start with the + default secret in non-development environments. +- ~~CORS allow-origins are hardcoded.~~ Allowed origins are now configurable + via environment variable. +- ~~Database credentials in docker-compose.yml.~~ Compose references `.env` + for sensitive values. + +### Error handling and resilience + +- ~~`get_db_client()` creates a new `DatabaseClient` on every call.~~ Refactored + to a shared pooled singleton initialized at startup. +- ~~No rate limiting on auth endpoints.~~ Rate limiting middleware added to + `/auth/login` and `/auth/register`. + +### Test coverage + +- ~~API tests bypass authentication.~~ JWT auth integration tests added (33 + cases covering registration, login, protected routes, token refresh, and + admin-only endpoints). +- ~~No test stage in CI.~~ Gitea Actions workflow now runs `pytest` and gates + the build. +- ~~No linting or type checking in CI.~~ `ruff` (Python) and `tsc --noEmit` + (TypeScript) added to CI pipeline. + +### Backend + +- ~~Add structured logging.~~ Python `logging` module used throughout. +- ~~Make LLM model configurable.~~ `MODEL` environment variable accepted; + multi-model support with per-analysis selection (GPT-4o, Gemini, Claude, + Llama). +- ~~SERP cache TTL hardcoded.~~ `SERP_CACHE_TTL_HOURS` exposed as env var. +- ~~Patent PDF storage.~~ S3/MinIO object storage backend added alongside + local filesystem. Volume mount requirement documented. +- ~~`analyze_single_patent` assumes local file.~~ Auto-download from cached + metadata link integrated. +- ~~`Patent.patent_id` typed as `int`.~~ Fixed to `str`. + +### Frontend + +- ~~No loading/error states.~~ Skeleton loaders and error states added to + Batch and Analytics pages. +- ~~No dark mode.~~ Full dark mode support with theme-aware chart colors. +- ~~Missing lockfile.~~ `package-lock.json` committed. + +### Features (formerly P3) + +- ~~Export analysis reports.~~ CSV and PDF export endpoints implemented. +- ~~Comparison view.~~ Side-by-side company patent portfolio comparison added. +- ~~Scheduled/recurring analysis.~~ APScheduler-based periodic re-analysis + with configurable interval and change-threshold alerting. +- ~~Webhook/notification support.~~ Slack, Discord, and generic HTTP POST + webhooks with retry logic. +- ~~Multi-model support.~~ Model picker in Analysis and Batch pages; backend + allow-list validation. +- ~~Patent trend charts.~~ Filing frequency and category distribution + visualizations added to Analytics page. +- ~~OpenAPI client generation.~~ TypeScript API client auto-generated from + FastAPI spec with CI freshness check. --- ## P1 -- High Priority -These items address correctness, security, and reliability gaps that should be +These items address correctness, reliability, and coverage gaps that should be resolved before broader production use. -### Security hardening +### Resilience -- **Rotate default JWT secret.** `auth.py` ships a fallback - `sparc-secret-key-change-in-production` that will be used if `JWT_SECRET` is - unset. Add a startup check that refuses to start with the default secret in - non-development environments. -- **CORS allow-origins are hardcoded.** `api.py` only permits - `localhost:3000` and `localhost:5173`. Make the allowed origins configurable - via environment variable so the dashboard works when deployed behind a real - domain. -- **Database credentials in docker-compose.yml.** The compose file embeds - `postgres:postgres` in plain text. Reference a `.env` file or Docker secrets - instead. +- **`_jobs` dict is in-memory only.** Job state is lost on API restart. + Persist job status in PostgreSQL or Redis so async batch results survive + restarts. -### Error handling and resilience +### Test coverage gaps -- **`get_db_client()` in `auth.py` creates a new `DatabaseClient` on every - call.** This bypasses the connection pool and can exhaust database - connections under load. Refactor to share a single pooled client. -- **`_jobs` dict is in-memory only.** Job state is lost on API restart. Persist - job status in PostgreSQL or Redis so async batch results survive restarts. -- **No rate limiting on auth endpoints.** `/auth/login` and `/auth/register` - are unprotected against brute-force or abuse. Add rate limiting middleware. - -### Test coverage for auth and admin - -- The existing API tests (`tests/test_api.py`) bypass authentication entirely. - Add tests that exercise the JWT flow: registration, login, protected-route - access, token refresh, and admin-only endpoints. +- **Export endpoint tests.** The CSV and PDF export endpoints (`/export/`) + lack test coverage. Add tests covering auth, success, 404, and edge cases. + *(Issue #1655)* +- **Tracked company admin endpoint tests.** The `/admin/tracked` CRUD + endpoints and scheduler integration lack test coverage. *(Issue #1656)* --- ## P2 -- Medium Priority -Improvements to usability, performance, and developer experience. +Improvements to reliability, test coverage, and code quality. -### Backend +### Test coverage -- **Add structured logging.** Replace `print()` calls throughout `analyzer.py`, - `serp_api.py`, and `llm.py` with Python `logging` so log levels and - formatting are consistent. -- **Make LLM model configurable.** `llm.py` hardcodes - `anthropic/claude-3.5-sonnet`. Accept a `MODEL` environment variable to allow - switching models without code changes. -- **SERP cache TTL is hardcoded to 24 hours.** Expose `SERP_CACHE_TTL_HOURS` - as an environment variable in `config.py`. -- **Patent PDF storage.** PDFs are saved to a local `patents/` directory. For - containerized deployments, consider object storage (S3/MinIO) or at minimum - document the volume mount requirement more prominently. -- **`analyze_single_patent` assumes local file path.** The method constructs - `patents/{patent_id}.pdf` and reads from disk, but does not download the PDF - first. Either integrate the download step or document the prerequisite. -- **`Patent.patent_id` typed as `int` in `types.py` but used as `str` - everywhere.** Fix the type annotation to `str`. +- **Webhook integration tests.** The retry logic, Slack/Discord payload + format, and multi-URL dispatch in `webhooks.py` need test coverage. + *(Issue #1657)* +- **S3/MinIO storage backend tests.** `storage.py` has local filesystem tests + but no unit tests for the S3 backend (read, write, exists, delete, + error handling). *(Issue #1660)* +- **`analyze_single_patent` auto-download path tests.** The auto-download + fallback (cache lookup, PDF download, FileNotFoundError) in + `analyzer.py` lacks test coverage. *(Issue #1661)* -### Frontend +### Code quality -- **No loading/error states on several pages.** The Batch and Analytics pages - would benefit from skeleton loaders and user-friendly error messages. -- **No dark mode.** Tailwind is configured but no dark variant is applied. -- **Missing `package-lock.json` or `pnpm-lock.yaml`.** The frontend has no - lockfile committed, leading to non-reproducible builds. +- **Scheduler creates its own DatabaseClient.** `scheduler.py` bypasses the + application-level pooled client, creating a new connection on every tick. + Refactor to use `get_db_client()`. *(Issue #1658)* -### CI/CD +### API improvements -- **No test stage in the Gitea Actions workflow.** `build.yaml` builds and - pushes images but never runs `pytest`. Add a test job that gates the build. -- **No linting or type checking.** Add `ruff` (Python) and `tsc --noEmit` - (TypeScript) to CI. +- **API pagination.** The `/analyze/batch` and `/jobs` endpoints could benefit + from cursor-based pagination for large result sets. +- **Request validation improvements.** Add stricter input validation for + company names (disallow special characters, enforce length limits). --- @@ -94,23 +139,20 @@ Improvements to usability, performance, and developer experience. Lower-urgency enhancements and future features. -- **Export analysis reports.** Allow users to download analysis results as PDF - or CSV from the dashboard. -- **Comparison view.** Side-by-side comparison of two companies' patent - portfolios. -- **Scheduled/recurring analysis.** Periodically re-analyze tracked companies - and alert on significant changes. -- **Webhook/notification support.** Send alerts (Slack, Discord, email) when - batch jobs complete or when a company's innovation score changes - significantly. -- **Multi-model support.** Let users choose between LLM providers per analysis - (e.g., GPT-4o, Gemini, Claude) and compare outputs. -- **Patent trend charts.** Visualize patent filing frequency and technology - category distribution over time in the Analytics page. -- **API pagination.** The `/analyze/batch` and `/jobs` endpoints could benefit - from cursor-based pagination for large result sets. -- **OpenAPI client generation.** Auto-generate the TypeScript API client from - the FastAPI OpenAPI spec to keep frontend types in sync. +- **Historical analysis diffing.** Show what changed between two analysis runs + for the same company, highlighting new patents and score shifts. +- **Patent classification tagging.** Automatically tag patents by technology + domain (AI, semiconductors, materials science) using LLM classification. +- **User-level API keys.** Allow users to generate personal API keys for + programmatic access without JWT token refresh. +- **Batch export.** Export analysis results for multiple companies at once as + a ZIP archive. +- **Rate limiting dashboard.** Surface rate limit status and usage statistics + in the admin panel. +- **Async webhook delivery.** Move webhook delivery to a background task queue + (e.g., Celery, arq) to avoid blocking the scheduler. +- **Multi-tenant support.** Scope analysis results and tracked companies per + user or organization. --- -- 2.52.0 From 63ca18e9bfb5bc0e20e5ba40554ffbc7bfa53743 Mon Sep 17 00:00:00 2001 From: agent-company Date: Mon, 20 Apr 2026 19:20:06 +0000 Subject: [PATCH 09/15] Add S3/MinIO storage backend tests for storage.py 21 test cases covering: - S3StorageBackend: read, write, exists, path_for with mocked boto3 - Error handling: NoSuchKey exception, generic 404, non-404 re-raise - Bucket auto-creation on init and graceful handling of creation failure - Constructor credential/endpoint passthrough - LocalStorageBackend: round-trip read/write, missing file, empty file - get_storage_backend() factory: local/s3 selection, case-insensitivity Closes leeworks-agents/SPARC#1660 Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/test_storage.py | 263 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 263 insertions(+) create mode 100644 tests/test_storage.py diff --git a/tests/test_storage.py b/tests/test_storage.py new file mode 100644 index 0000000..ba75be8 --- /dev/null +++ b/tests/test_storage.py @@ -0,0 +1,263 @@ +"""Tests for S3/MinIO storage backend in storage.py. + +Covers issue #1660: +- S3StorageBackend read, write, exists, path_for +- Error handling: NoSuchKey, generic S3 errors, bucket auto-creation +- get_storage_backend() factory function +- LocalStorageBackend (basic sanity checks) +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from SPARC.storage import LocalStorageBackend, S3StorageBackend, get_storage_backend + + +# ---------- S3StorageBackend ---------- + +class TestS3StorageBackend: + """Tests for the S3-compatible storage backend.""" + + @pytest.fixture + def s3_backend(self): + """Create an S3StorageBackend with a fully mocked boto3 client.""" + with patch.dict("sys.modules", {"boto3": MagicMock()}): + import boto3 as mock_boto + mock_s3 = MagicMock() + mock_boto.client.return_value = mock_s3 + mock_s3.head_bucket.return_value = {} + + backend = S3StorageBackend( + bucket="test-bucket", + endpoint_url="http://minio:9000", + access_key="minioadmin", + secret_key="minioadmin", + ) + # Expose mock for assertions + backend._mock_s3 = mock_s3 + yield backend + + def test_write_puts_object(self, s3_backend): + """write() calls put_object with correct bucket, key, and body.""" + s3_backend.write("US-12345678-B2.pdf", b"PDF content here") + + s3_backend._mock_s3.put_object.assert_called_once_with( + Bucket="test-bucket", + Key="US-12345678-B2.pdf", + Body=b"PDF content here", + ContentType="application/pdf", + ) + + def test_read_returns_body(self, s3_backend): + """read() returns the Body content from get_object.""" + mock_body = MagicMock() + mock_body.read.return_value = b"PDF data" + s3_backend._mock_s3.get_object.return_value = {"Body": mock_body} + + result = s3_backend.read("US-12345678-B2.pdf") + + assert result == b"PDF data" + s3_backend._mock_s3.get_object.assert_called_once_with( + Bucket="test-bucket", + Key="US-12345678-B2.pdf", + ) + + def test_read_nosuchkey_raises_file_not_found(self, s3_backend): + """read() raises FileNotFoundError when object does not exist.""" + # Create a NoSuchKey exception class on the mock + nosuchkey = type("NoSuchKey", (Exception,), {}) + s3_backend._mock_s3.exceptions.NoSuchKey = nosuchkey + s3_backend._mock_s3.get_object.side_effect = nosuchkey("not found") + + # Reassign s3 to trigger the except branch + s3_backend.s3 = s3_backend._mock_s3 + + with pytest.raises(FileNotFoundError, match="S3 object not found"): + s3_backend.read("missing.pdf") + + def test_read_generic_404_raises_file_not_found(self, s3_backend): + """read() handles generic 404 errors from S3-compatible APIs.""" + nosuchkey = type("NoSuchKey", (Exception,), {}) + s3_backend._mock_s3.exceptions.NoSuchKey = nosuchkey + s3_backend.s3 = s3_backend._mock_s3 + s3_backend.s3.get_object.side_effect = Exception("An error occurred (404)") + + with pytest.raises(FileNotFoundError, match="S3 object not found"): + s3_backend.read("missing.pdf") + + def test_read_other_error_re_raises(self, s3_backend): + """read() re-raises non-404 errors.""" + nosuchkey = type("NoSuchKey", (Exception,), {}) + s3_backend._mock_s3.exceptions.NoSuchKey = nosuchkey + s3_backend.s3 = s3_backend._mock_s3 + s3_backend.s3.get_object.side_effect = Exception("Internal server error") + + with pytest.raises(Exception, match="Internal server error"): + s3_backend.read("some-file.pdf") + + def test_exists_returns_true_for_existing_object(self, s3_backend): + """exists() returns True when head_object succeeds with content.""" + s3_backend._mock_s3.head_object.return_value = {"ContentLength": 1024} + + assert s3_backend.exists("US-12345678-B2.pdf") is True + + def test_exists_returns_false_for_missing_object(self, s3_backend): + """exists() returns False when head_object raises an exception.""" + s3_backend._mock_s3.head_object.side_effect = Exception("Not Found") + + assert s3_backend.exists("missing.pdf") is False + + def test_exists_returns_false_for_zero_length(self, s3_backend): + """exists() returns False when object has zero content length.""" + s3_backend._mock_s3.head_object.return_value = {"ContentLength": 0} + + assert s3_backend.exists("empty.pdf") is False + + def test_path_for_returns_s3_uri(self, s3_backend): + """path_for() returns an s3:// URI.""" + path = s3_backend.path_for("US-12345678-B2.pdf") + + assert path == "s3://test-bucket/US-12345678-B2.pdf" + + def test_constructor_creates_bucket_if_missing(self): + """Constructor creates the bucket if head_bucket fails.""" + with patch.dict("sys.modules", {"boto3": MagicMock()}): + import boto3 as mock_boto + mock_s3 = MagicMock() + mock_boto.client.return_value = mock_s3 + mock_s3.head_bucket.side_effect = Exception("Bucket not found") + + S3StorageBackend( + bucket="new-bucket", + endpoint_url="http://minio:9000", + access_key="admin", + secret_key="admin", + ) + + mock_s3.create_bucket.assert_called_once_with(Bucket="new-bucket") + + def test_constructor_handles_bucket_creation_failure(self): + """Constructor logs warning but does not crash if bucket creation fails.""" + with patch.dict("sys.modules", {"boto3": MagicMock()}): + import boto3 as mock_boto + mock_s3 = MagicMock() + mock_boto.client.return_value = mock_s3 + mock_s3.head_bucket.side_effect = Exception("Bucket not found") + mock_s3.create_bucket.side_effect = Exception("Permission denied") + + # Should not raise + backend = S3StorageBackend( + bucket="locked-bucket", + endpoint_url="http://minio:9000", + access_key="admin", + secret_key="admin", + ) + assert backend.bucket == "locked-bucket" + + def test_constructor_passes_endpoint_and_credentials(self): + """Constructor passes endpoint_url and credentials to boto3.client.""" + with patch.dict("sys.modules", {"boto3": MagicMock()}): + import boto3 as mock_boto + mock_s3 = MagicMock() + mock_boto.client.return_value = mock_s3 + + S3StorageBackend( + bucket="test", + endpoint_url="http://minio:9000", + access_key="mykey", + secret_key="mysecret", + ) + + mock_boto.client.assert_called_with( + "s3", + endpoint_url="http://minio:9000", + aws_access_key_id="mykey", + aws_secret_access_key="mysecret", + ) + + +# ---------- LocalStorageBackend ---------- + +class TestLocalStorageBackend: + """Basic sanity checks for the local filesystem backend.""" + + def test_write_and_read(self, tmp_path): + """Write and read round-trip produces identical content.""" + backend = LocalStorageBackend(base_dir=str(tmp_path)) + backend.write("test.pdf", b"hello world") + + result = backend.read("test.pdf") + assert result == b"hello world" + + def test_read_missing_file_raises(self, tmp_path): + """Reading a non-existent file raises FileNotFoundError.""" + backend = LocalStorageBackend(base_dir=str(tmp_path)) + + with pytest.raises(FileNotFoundError): + backend.read("nonexistent.pdf") + + def test_exists_true_for_written_file(self, tmp_path): + """exists() returns True after writing a file.""" + backend = LocalStorageBackend(base_dir=str(tmp_path)) + backend.write("test.pdf", b"data") + + assert backend.exists("test.pdf") is True + + def test_exists_false_for_missing_file(self, tmp_path): + """exists() returns False for non-existent file.""" + backend = LocalStorageBackend(base_dir=str(tmp_path)) + + assert backend.exists("missing.pdf") is False + + def test_exists_false_for_empty_file(self, tmp_path): + """exists() returns False for zero-length file.""" + backend = LocalStorageBackend(base_dir=str(tmp_path)) + backend.write("empty.pdf", b"") + + assert backend.exists("empty.pdf") is False + + def test_path_for_returns_full_path(self, tmp_path): + """path_for() returns the full filesystem path.""" + backend = LocalStorageBackend(base_dir=str(tmp_path)) + path = backend.path_for("test.pdf") + + assert path == str(tmp_path / "test.pdf") + + +# ---------- get_storage_backend() factory ---------- + +class TestGetStorageBackend: + """Tests for the storage backend factory function.""" + + @patch("SPARC.storage.config") + def test_returns_local_backend_by_default(self, mock_config): + """Default config returns LocalStorageBackend.""" + mock_config.storage_backend = "local" + + backend = get_storage_backend() + + assert isinstance(backend, LocalStorageBackend) + + @patch("SPARC.storage.config") + def test_returns_s3_backend_when_configured(self, mock_config): + """Setting storage_backend=s3 returns S3StorageBackend.""" + mock_config.storage_backend = "s3" + mock_config.s3_bucket = "test-bucket" + mock_config.s3_endpoint_url = "http://minio:9000" + mock_config.s3_access_key = "key" + mock_config.s3_secret_key = "secret" + + with patch.dict("sys.modules", {"boto3": MagicMock()}): + backend = get_storage_backend() + + assert isinstance(backend, S3StorageBackend) + + @patch("SPARC.storage.config") + def test_case_insensitive_backend_selection(self, mock_config): + """Backend selection is case-insensitive.""" + mock_config.storage_backend = "LOCAL" + + backend = get_storage_backend() + + assert isinstance(backend, LocalStorageBackend) -- 2.52.0 From a2f81b0396c3ea606c4b61315d79d464ff0a9016 Mon Sep 17 00:00:00 2001 From: agent-company Date: Mon, 20 Apr 2026 19:21:53 +0000 Subject: [PATCH 10/15] Add test coverage for analyze_single_patent auto-download path 7 test cases covering: - PDF on disk analyzed directly (no download) - Auto-download from cached metadata link when PDF missing - FileNotFoundError when no cached link available - Cached patent without pdf_link raises FileNotFoundError - Analysis pipeline failure returns error string gracefully - Model override parameter forwarded to LLM - FileNotFoundError during parsing re-raised (not swallowed) Closes leeworks-agents/SPARC#1661 Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/test_analyze_single_patent.py | 211 ++++++++++++++++++++++++++++ 1 file changed, 211 insertions(+) create mode 100644 tests/test_analyze_single_patent.py diff --git a/tests/test_analyze_single_patent.py b/tests/test_analyze_single_patent.py new file mode 100644 index 0000000..3b2283b --- /dev/null +++ b/tests/test_analyze_single_patent.py @@ -0,0 +1,211 @@ +"""Tests for analyze_single_patent auto-download path. + +Covers issue #1661: +- PDF exists on disk: direct analysis (happy path) +- PDF not on disk, cached link exists: auto-download and analyze +- PDF not on disk, no cached link: FileNotFoundError +- Analysis failure after PDF found: graceful error message +- Model override parameter passthrough +""" + +import os +from unittest.mock import MagicMock, patch + +import pytest + +from SPARC.analyzer import CompanyAnalyzer +from SPARC.types import Patent + + +@pytest.fixture(autouse=True) +def mock_db(mocker): + """Mock DatabaseClient so no real DB is needed.""" + mock_db_cls = mocker.patch("SPARC.analyzer.DatabaseClient") + mock_db_instance = MagicMock() + mock_db_instance.get_cached_patent.return_value = None + mock_db_instance.get_cached_serp_query.return_value = None + mock_db_cls.return_value = mock_db_instance + return mock_db_instance + + +@pytest.fixture +def analyzer(mocker, mock_db): + """Create a CompanyAnalyzer with mocked LLM and DB.""" + mocker.patch("SPARC.analyzer.LLMAnalyzer") + return CompanyAnalyzer(openrouter_api_key="test-key") + + +class TestAnalyzeSinglePatentAutoDownload: + """Test the auto-download logic in analyze_single_patent.""" + + def test_pdf_on_disk_analyzed_directly(self, analyzer, mocker, tmp_path): + """When PDF exists on disk, it is analyzed directly without download.""" + patent_id = "US-11234567-B2" + + # Create the patents dir and PDF file + patents_dir = tmp_path / "patents" + patents_dir.mkdir() + pdf_path = patents_dir / f"{patent_id}.pdf" + pdf_path.write_bytes(b"fake PDF content") + + mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf") + mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm") + mock_parse.return_value = {"abstract": "test", "claims": "test claims"} + mock_minimize.return_value = "minimized content" + analyzer.llm_analyzer.analyze_patent_content.return_value = "Good patent." + + # Change cwd so patents/{patent_id}.pdf resolves to our tmp_path + original_cwd = os.getcwd() + os.chdir(tmp_path) + try: + result = analyzer.analyze_single_patent(patent_id, "TestCo") + finally: + os.chdir(original_cwd) + + assert result == "Good patent." + # DB cache should not have been queried since file existed + analyzer.db.get_cached_patent.assert_not_called() + + def test_auto_download_from_cached_link(self, analyzer, mocker, tmp_path): + """When PDF is not on disk but link is cached, auto-download occurs.""" + patent_id = "US-99887766-A1" + + # No patents dir exists (PDF not on disk) + mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents") + downloaded_patent = Patent(patent_id=patent_id, pdf_link="https://example.com/patent.pdf") + downloaded_patent.pdf_path = f"patents/{patent_id}.pdf" + mock_save.return_value = downloaded_patent + + # Cached patent has a PDF link + analyzer.db.get_cached_patent.return_value = { + "patent_id": patent_id, + "pdf_link": "https://example.com/patent.pdf", + } + + # Mock the rest of the analysis pipeline + mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf") + mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm") + mock_parse.return_value = {"abstract": "test abstract"} + mock_minimize.return_value = "minimized content" + analyzer.llm_analyzer.analyze_patent_content.return_value = "Strong innovation." + + # Change cwd so patents/{patent_id}.pdf does NOT exist + original_cwd = os.getcwd() + os.chdir(tmp_path) + try: + result = analyzer.analyze_single_patent(patent_id, "DownloadCo") + finally: + os.chdir(original_cwd) + + assert result == "Strong innovation." + analyzer.db.get_cached_patent.assert_called_once_with(patent_id) + mock_save.assert_called_once() + # Verify the Patent passed to save_patents has the correct ID and link + saved_patent = mock_save.call_args[0][0] + assert saved_patent.patent_id == patent_id + assert saved_patent.pdf_link == "https://example.com/patent.pdf" + + def test_no_cached_link_raises_file_not_found(self, analyzer, mocker, tmp_path): + """When PDF is not on disk and no cached link, FileNotFoundError raised.""" + patent_id = "US-00000000-X1" + + analyzer.db.get_cached_patent.return_value = None + + original_cwd = os.getcwd() + os.chdir(tmp_path) + try: + with pytest.raises(FileNotFoundError, match="no download link is cached"): + analyzer.analyze_single_patent(patent_id, "MissingCo") + finally: + os.chdir(original_cwd) + + def test_cached_patent_without_pdf_link_raises(self, analyzer, mocker, tmp_path): + """When cached patent exists but has no pdf_link, FileNotFoundError raised.""" + patent_id = "US-11111111-B1" + + analyzer.db.get_cached_patent.return_value = { + "patent_id": patent_id, + "pdf_link": None, + } + + original_cwd = os.getcwd() + os.chdir(tmp_path) + try: + with pytest.raises(FileNotFoundError, match="no download link is cached"): + analyzer.analyze_single_patent(patent_id, "NoPDFCo") + finally: + os.chdir(original_cwd) + + def test_analysis_exception_returns_error_message(self, analyzer, mocker, tmp_path): + """When analysis pipeline fails, returns error string instead of raising.""" + patent_id = "US-22222222-A2" + + # Create the PDF on disk so it skips download + patents_dir = tmp_path / "patents" + patents_dir.mkdir() + (patents_dir / f"{patent_id}.pdf").write_bytes(b"fake PDF") + + # Parse fails + mocker.patch( + "SPARC.analyzer.SERP.parse_patent_pdf", + side_effect=ValueError("Corrupt PDF"), + ) + + original_cwd = os.getcwd() + os.chdir(tmp_path) + try: + result = analyzer.analyze_single_patent(patent_id, "ErrorCo") + finally: + os.chdir(original_cwd) + + assert "Failed to analyze patent" in result + assert "Corrupt PDF" in result + + def test_model_override_passed_to_llm(self, analyzer, mocker, tmp_path): + """The model parameter is forwarded to the LLM analyzer.""" + patent_id = "US-33333333-B2" + + patents_dir = tmp_path / "patents" + patents_dir.mkdir() + (patents_dir / f"{patent_id}.pdf").write_bytes(b"fake PDF") + + mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf", return_value={"abstract": "test"}) + mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm", return_value="content") + analyzer.llm_analyzer.analyze_patent_content.return_value = "Analysis result." + + original_cwd = os.getcwd() + os.chdir(tmp_path) + try: + result = analyzer.analyze_single_patent( + patent_id, "ModelCo", model="openai/gpt-4o" + ) + finally: + os.chdir(original_cwd) + + assert result == "Analysis result." + analyzer.llm_analyzer.analyze_patent_content.assert_called_once_with( + patent_content="content", + company_name="ModelCo", + model="openai/gpt-4o", + ) + + def test_file_not_found_during_parse_re_raised(self, analyzer, mocker, tmp_path): + """FileNotFoundError during parsing is re-raised, not caught.""" + patent_id = "US-44444444-C1" + + patents_dir = tmp_path / "patents" + patents_dir.mkdir() + (patents_dir / f"{patent_id}.pdf").write_bytes(b"fake PDF") + + mocker.patch( + "SPARC.analyzer.SERP.parse_patent_pdf", + side_effect=FileNotFoundError("PDF file vanished"), + ) + + original_cwd = os.getcwd() + os.chdir(tmp_path) + try: + with pytest.raises(FileNotFoundError, match="PDF file vanished"): + analyzer.analyze_single_patent(patent_id, "VanishCo") + finally: + os.chdir(original_cwd) -- 2.52.0 From 6165d66760cc69d16da25ca372c6de77f8c1c589 Mon Sep 17 00:00:00 2001 From: agent-company Date: Mon, 20 Apr 2026 23:05:42 +0000 Subject: [PATCH 11/15] Fix scheduler tests to use get_db_client after scheduler refactor The scheduler was refactored (PR #1665) to use the pooled get_db_client() from SPARC.auth instead of creating its own DatabaseClient. Update test mocks accordingly and remove the db.close() assertion since the pooled client is no longer closed by the scheduler. Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/test_tracked_companies.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tests/test_tracked_companies.py b/tests/test_tracked_companies.py index d1e96fa..df25134 100644 --- a/tests/test_tracked_companies.py +++ b/tests/test_tracked_companies.py @@ -272,7 +272,7 @@ class TestSchedulerIntegration: mock_db = MagicMock() mock_db.list_tracked_companies.return_value = [] - with patch("SPARC.scheduler.DatabaseClient", return_value=mock_db), \ + with patch("SPARC.scheduler.get_db_client", return_value=mock_db), \ patch("SPARC.scheduler.CompanyAnalyzer") as mock_analyzer_cls: from SPARC.scheduler import run_scheduled_analysis run_scheduled_analysis() @@ -292,7 +292,7 @@ class TestSchedulerIntegration: mock_analyzer = MagicMock() mock_analyzer._analyze_company_safe.side_effect = [mock_result_nvidia, mock_result_amd] - with patch("SPARC.scheduler.DatabaseClient", return_value=mock_db), \ + with patch("SPARC.scheduler.get_db_client", return_value=mock_db), \ patch("SPARC.scheduler.CompanyAnalyzer", return_value=mock_analyzer): from SPARC.scheduler import run_scheduled_analysis run_scheduled_analysis() @@ -312,7 +312,7 @@ class TestSchedulerIntegration: mock_analyzer = MagicMock() mock_analyzer._analyze_company_safe.return_value = mock_result - with patch("SPARC.scheduler.DatabaseClient", return_value=mock_db), \ + with patch("SPARC.scheduler.get_db_client", return_value=mock_db), \ patch("SPARC.scheduler.CompanyAnalyzer", return_value=mock_analyzer): from SPARC.scheduler import run_scheduled_analysis run_scheduled_analysis() @@ -335,7 +335,7 @@ class TestSchedulerIntegration: mock_analyzer = MagicMock() mock_analyzer._analyze_company_safe.return_value = mock_result - with patch("SPARC.scheduler.DatabaseClient", return_value=mock_db), \ + with patch("SPARC.scheduler.get_db_client", return_value=mock_db), \ patch("SPARC.scheduler.CompanyAnalyzer", return_value=mock_analyzer): from SPARC.scheduler import run_scheduled_analysis run_scheduled_analysis() @@ -355,7 +355,7 @@ class TestSchedulerIntegration: mock_analyzer = MagicMock() mock_analyzer._analyze_company_safe.side_effect = [mock_fail_result, mock_ok_result] - with patch("SPARC.scheduler.DatabaseClient", return_value=mock_db), \ + with patch("SPARC.scheduler.get_db_client", return_value=mock_db), \ patch("SPARC.scheduler.CompanyAnalyzer", return_value=mock_analyzer): from SPARC.scheduler import run_scheduled_analysis run_scheduled_analysis() @@ -378,11 +378,10 @@ class TestSchedulerIntegration: mock_ok_result, ] - with patch("SPARC.scheduler.DatabaseClient", return_value=mock_db), \ + with patch("SPARC.scheduler.get_db_client", return_value=mock_db), \ patch("SPARC.scheduler.CompanyAnalyzer", return_value=mock_analyzer): from SPARC.scheduler import run_scheduled_analysis run_scheduled_analysis() # OKCo should still be processed mock_db.update_tracked_company.assert_called_once_with("OKCo", 22) - mock_db.close.assert_called_once() -- 2.52.0 From 7c6eed8d72c2a18cb53b6a5e898782df57358f19 Mon Sep 17 00:00:00 2001 From: agent-company Date: Mon, 18 May 2026 21:29:14 +0000 Subject: [PATCH 12/15] Update ROADMAP.md to mark completed P1 and P2 items as done Move seven completed items from the P1 and P2 sections into the Completed section: in-memory jobs persistence, export endpoint tests, tracked company admin tests, webhook integration tests, S3 storage tests, auto-download path tests, and scheduler DatabaseClient refactor. The P2 section now only lists the two genuinely open items: cursor-based pagination (Issue #1669) and request validation (Issue #1670). Closes leeworks-agents/SPARC#1678 Co-Authored-By: Claude Opus 4.6 (1M context) --- ROADMAP.md | 67 ++++++++++++++++++++++++------------------------------ 1 file changed, 30 insertions(+), 37 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index 5b177d9..0e86ab5 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -81,57 +81,50 @@ Items that have been implemented and merged into main. - ~~OpenAPI client generation.~~ TypeScript API client auto-generated from FastAPI spec with CI freshness check. +### Resilience + +- ~~`_jobs` dict is in-memory only.~~ Database-backed job persistence + implemented using `db.list_jobs()` and `mark_stale_jobs_failed()`. The + in-memory `_jobs` dict has been removed. + +### Test coverage (P1/P2) + +- ~~Export endpoint tests.~~ Tests added for CSV and PDF export endpoints. +- ~~Tracked company admin endpoint tests.~~ Tests added for `/admin/tracked` + CRUD endpoints and scheduler integration. +- ~~Webhook integration tests.~~ Tests added for retry logic, Slack/Discord + payload format, and multi-URL dispatch. +- ~~S3/MinIO storage backend tests.~~ Unit tests added for the S3 backend + (read, write, exists, delete, error handling). +- ~~`analyze_single_patent` auto-download path tests.~~ Tests added for the + auto-download fallback (cache lookup, PDF download, FileNotFoundError). + +### Code quality + +- ~~Scheduler creates its own DatabaseClient.~~ Refactored to use the + application-level pooled `get_db_client()`. + --- ## P1 -- High Priority -These items address correctness, reliability, and coverage gaps that should be -resolved before broader production use. - -### Resilience - -- **`_jobs` dict is in-memory only.** Job state is lost on API restart. - Persist job status in PostgreSQL or Redis so async batch results survive - restarts. - -### Test coverage gaps - -- **Export endpoint tests.** The CSV and PDF export endpoints (`/export/`) - lack test coverage. Add tests covering auth, success, 404, and edge cases. - *(Issue #1655)* -- **Tracked company admin endpoint tests.** The `/admin/tracked` CRUD - endpoints and scheduler integration lack test coverage. *(Issue #1656)* +No outstanding P1 items. All previously listed items have been completed and +moved to the Completed section above. --- ## P2 -- Medium Priority -Improvements to reliability, test coverage, and code quality. - -### Test coverage - -- **Webhook integration tests.** The retry logic, Slack/Discord payload - format, and multi-URL dispatch in `webhooks.py` need test coverage. - *(Issue #1657)* -- **S3/MinIO storage backend tests.** `storage.py` has local filesystem tests - but no unit tests for the S3 backend (read, write, exists, delete, - error handling). *(Issue #1660)* -- **`analyze_single_patent` auto-download path tests.** The auto-download - fallback (cache lookup, PDF download, FileNotFoundError) in - `analyzer.py` lacks test coverage. *(Issue #1661)* - -### Code quality - -- **Scheduler creates its own DatabaseClient.** `scheduler.py` bypasses the - application-level pooled client, creating a new connection on every tick. - Refactor to use `get_db_client()`. *(Issue #1658)* +Improvements to the API surface. ### API improvements -- **API pagination.** The `/analyze/batch` and `/jobs` endpoints could benefit - from cursor-based pagination for large result sets. +- **API pagination.** The `/analyze/batch` endpoint needs cursor-based + pagination for large result sets. The `/jobs` endpoint already has cursor + pagination. *(Issue #1669)* - **Request validation improvements.** Add stricter input validation for company names (disallow special characters, enforce length limits). + *(Issue #1670)* --- -- 2.52.0 From a95129904e1a2dd37d594693e69f67133661c155 Mon Sep 17 00:00:00 2001 From: agent-company Date: Mon, 18 May 2026 21:38:44 +0000 Subject: [PATCH 13/15] Add stricter input validation for company names on analysis endpoints Add a CompanyName validated type enforcing 2-100 character length and allowing only alphanumeric characters, spaces, hyphens, ampersands, and periods. Applied to all endpoints accepting company names: /analyze, /analyze/patent, /analyze/batch, /admin/tracked, and /export. Includes unit tests covering too-short, too-long, special character, leading-character, and valid edge cases for both single and batch endpoints. Closes leeworks-agents/SPARC#1670 Co-Authored-By: Claude Opus 4.6 (1M context) --- SPARC/api.py | 28 +++-- tests/test_company_name_validation.py | 157 ++++++++++++++++++++++++++ 2 files changed, 176 insertions(+), 9 deletions(-) create mode 100644 tests/test_company_name_validation.py diff --git a/SPARC/api.py b/SPARC/api.py index a42ddd7..cf053e8 100644 --- a/SPARC/api.py +++ b/SPARC/api.py @@ -12,10 +12,10 @@ from typing import TYPE_CHECKING, Annotated, List if TYPE_CHECKING: from SPARC.database import DatabaseClient -from fastapi import BackgroundTasks, Depends, FastAPI, HTTPException, Query, Request +from fastapi import BackgroundTasks, Depends, FastAPI, HTTPException, Path, Query, Request from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse, StreamingResponse -from pydantic import BaseModel, EmailStr, Field +from pydantic import BaseModel, EmailStr, Field, StringConstraints from slowapi import Limiter from slowapi.errors import RateLimitExceeded from slowapi.util import get_remote_address @@ -36,6 +36,16 @@ from SPARC.auth import ( ) from SPARC.types import BatchAnalysisResult, CompanyAnalysisResult +# Validated company name type: 2-100 chars, alphanumeric + spaces/hyphens/ampersands/periods only. +CompanyName = Annotated[ + str, + StringConstraints( + min_length=2, + max_length=100, + pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$", + ), +] + # Pydantic models for API class CompanyAnalysisResponse(BaseModel): @@ -72,7 +82,7 @@ class CompanyAnalysisRequest(BaseModel): class BatchAnalysisRequest(BaseModel): """Request model for batch company analysis.""" - companies: list[str] = Field( + companies: list[CompanyName] = Field( ..., min_length=1, max_length=20, description="List of company names to analyze" ) max_workers: int = Field( @@ -405,7 +415,7 @@ async def delete_user( class TrackCompanyRequest(BaseModel): """Request to add a company to tracking.""" - company_name: str = Field(..., min_length=1, max_length=255) + company_name: CompanyName = Field(...) @app.get("/admin/tracked", tags=["Admin"]) @@ -432,7 +442,7 @@ async def add_tracked_company( @app.delete("/admin/tracked/{company_name}", tags=["Admin"]) async def remove_tracked_company( - company_name: str, + company_name: Annotated[str, Path(min_length=2, max_length=100, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$")], _: UserResponse = Depends(get_current_admin), ): """Remove a company from the tracked list (admin only).""" @@ -590,7 +600,7 @@ async def get_analytics_trends( @app.get("/export/{company_name}", tags=["Export"]) async def export_company_csv( - company_name: str, + company_name: Annotated[str, Path(min_length=2, max_length=100, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$")], _: UserResponse = Depends(get_current_user), ): """Export analysis results for a company as a CSV file. @@ -642,7 +652,7 @@ async def export_company_csv( @app.get("/export/{company_name}/pdf", tags=["Export"]) async def export_company_pdf( - company_name: str, + company_name: Annotated[str, Path(min_length=2, max_length=100, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$")], _: UserResponse = Depends(get_current_user), ): """Export analysis results for a company as a formatted PDF report. @@ -816,7 +826,7 @@ async def health_check(): tags=["Analysis"], ) async def analyze_company( - company_name: str, + company_name: Annotated[str, Path(min_length=2, max_length=100, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$")], model: str | None = Query(default=None, description="LLM model to use (e.g. 'openai/gpt-4o'). Defaults to server config."), _: UserResponse = Depends(get_current_user), ): @@ -846,7 +856,7 @@ async def analyze_company( ) async def analyze_single_patent( patent_id: str, - company_name: str = Query(description="Company name for analysis context"), + company_name: Annotated[str, Query(min_length=2, max_length=100, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$", description="Company name for analysis context")], _: UserResponse = Depends(get_current_user), ): """Analyze a single patent by its publication ID. diff --git a/tests/test_company_name_validation.py b/tests/test_company_name_validation.py new file mode 100644 index 0000000..3e6855f --- /dev/null +++ b/tests/test_company_name_validation.py @@ -0,0 +1,157 @@ +"""Tests for company name input validation on analysis endpoints.""" + +from datetime import datetime +from unittest.mock import Mock + +import pytest +from fastapi.testclient import TestClient + +from SPARC.api import app +from SPARC.types import CompanyAnalysisResult + + +@pytest.fixture +def client(): + """Create test client.""" + return TestClient(app) + + +@pytest.fixture +def mock_analyzer(mocker): + """Mock the global analyzer so valid requests succeed.""" + mock = Mock() + mock._analyze_company_safe.return_value = CompanyAnalysisResult( + company_name="nvidia", + analysis="Test analysis", + patent_count=1, + success=True, + timestamp=datetime.now(), + ) + mocker.patch("SPARC.api._analyzer", mock) + return mock + + +class TestCompanyNameValidation: + """Test that company names are validated on analysis endpoints.""" + + # --- Too short --- + + def test_single_char_rejected(self, client, mock_analyzer): + """A one-character company name should be rejected.""" + response = client.get("/analyze/X") + assert response.status_code == 422 + + # --- Too long --- + + def test_over_100_chars_rejected(self, client, mock_analyzer): + """A company name longer than 100 characters should be rejected.""" + long_name = "A" * 101 + response = client.get(f"/analyze/{long_name}") + assert response.status_code == 422 + + # --- Special characters --- + + @pytest.mark.parametrize( + "bad_name", + [ + "nvidia!", + "intel@corp", + "test#company", + "foo$bar", + "a%b", + "x^y", + "semi;colon", + "drop'table", + 'say"hello', + "path/traversal", + "back\\slash", + "pipe|char", + "star*glob", + "question?mark", + "