Compare commits

..

1 Commits

Author SHA1 Message Date
agent-company 0e68e8c900 Add cursor-based pagination to /analyze/batch and /jobs endpoints
- Fix route ordering bug: GET /analyze/batch was shadowed by
  GET /analyze/{company_name} causing all GET requests to /analyze/batch
  to be erroneously handled as single-company analysis (503). Move
  /analyze/batch GET registration to before the {company_name} route.
- Update TypeScript schema.d.ts: add AnalysisRecord, PaginatedAnalysisResponse,
  PaginatedJobsResponse schemas; add GET /analyze/batch operation with
  cursor+limit+company_name params; update list_jobs_jobs_get to include
  cursor param and return PaginatedJobsResponse.
- Update frontend/src/api/client.ts: add listBatchAnalyses() method with
  cursor/limit support; update listJobs() to accept cursor and return
  PaginatedJobsResponse; default limit changed from 10 to 50.
- Update frontend/src/types/index.ts: export AnalysisRecord,
  PaginatedAnalysisResponse, PaginatedJobsResponse.
- Expand tests/test_pagination.py: add auth fixture so tests pass JWT
  validation; add 11 new /jobs tests covering first page, last page,
  subsequent pages, empty results, status filter, limit boundaries, cursor
  forwarding, and paginated response shape.

Closes leeworks-agents/SPARC#1684

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-19 15:34:18 +00:00
9 changed files with 379 additions and 447 deletions
+56 -98
View File
@@ -5,9 +5,8 @@ Provides REST API endpoints for analyzing company patent portfolios.
from __future__ import annotations
from collections import deque
from contextlib import asynccontextmanager
from datetime import datetime, timedelta, timezone
from datetime import datetime
from typing import TYPE_CHECKING, Annotated, List
if TYPE_CHECKING:
@@ -249,9 +248,6 @@ app.state.limiter = limiter
# In-memory rate limit statistics
_rate_limit_stats: dict[str, dict] = {}
# Time-series log of rejected requests (capped to last 24 h worth of entries).
_rejected_log: deque[dict] = deque(maxlen=100_000)
def _track_rate_limit_request(endpoint: str, ip: str, rejected: bool = False) -> None:
"""Record a request against a rate-limited endpoint."""
@@ -266,11 +262,6 @@ def _track_rate_limit_request(endpoint: str, ip: str, rejected: bool = False) ->
_rate_limit_stats[key]["total_requests"] += 1
if rejected:
_rate_limit_stats[key]["rejected_requests"] += 1
_rejected_log.append({
"endpoint": endpoint,
"ip": ip,
"timestamp": datetime.now(timezone.utc).isoformat(),
})
ip_stats = _rate_limit_stats[key].setdefault("by_ip", {})
if ip not in ip_stats:
ip_stats[ip] = {"total": 0, "rejected": 0}
@@ -516,12 +507,10 @@ async def get_rate_limit_stats(
"""Get rate limit status and usage statistics (admin only).
Returns current rate limit configuration and request statistics
for all rate-limited endpoints, including per-IP breakdown and
a time-series of throttled (rejected) requests in the last 24 hours.
for all rate-limited endpoints.
Returns:
Rate limit stats per endpoint, per-IP breakdown, and throttled
request history bucketed by hour.
List of rate limit stats per endpoint with total/rejected counts
"""
rate_limits_config = {
"/auth/register": {"limit": "5/minute"},
@@ -531,45 +520,14 @@ async def get_rate_limit_stats(
results = []
for endpoint, conf in rate_limits_config.items():
stats = _rate_limit_stats.get(endpoint, {})
by_ip_raw = stats.get("by_ip", {})
by_ip = [
{"ip": ip, "total": counts["total"], "rejected": counts["rejected"]}
for ip, counts in by_ip_raw.items()
]
results.append({
"endpoint": endpoint,
"limit": conf["limit"],
"total_requests": stats.get("total_requests", 0),
"rejected_requests": stats.get("rejected_requests", 0),
"by_ip": by_ip,
})
# Build hourly buckets of throttled requests for the last 24 hours
now = datetime.now(timezone.utc)
cutoff = now - timedelta(hours=24)
hourly_buckets: dict[str, int] = {}
throttled_24h = 0
for entry in _rejected_log:
ts_str = entry["timestamp"]
try:
ts = datetime.fromisoformat(ts_str)
except (ValueError, TypeError):
continue
if ts >= cutoff:
throttled_24h += 1
bucket = ts.strftime("%Y-%m-%dT%H:00:00Z")
hourly_buckets[bucket] = hourly_buckets.get(bucket, 0) + 1
throttled_over_time = [
{"timestamp": k, "count": v}
for k, v in sorted(hourly_buckets.items())
]
return {
"rate_limits": results,
"throttled_24h": throttled_24h,
"throttled_over_time": throttled_over_time,
}
return {"rate_limits": results}
@app.get("/admin/alerts", tags=["Admin"])
@@ -939,6 +897,58 @@ async def health_check():
)
@app.get(
"/analyze/batch",
response_model=PaginatedAnalysisResponse,
tags=["Analysis"],
)
async def list_analysis_results(
company_name: Annotated[
str | None,
Query(description="Filter results by company name"),
] = None,
limit: Annotated[int, Query(ge=1, le=200)] = 50,
cursor: Annotated[
str | None,
Query(description="Opaque cursor from a previous response's next_cursor field"),
] = None,
_: UserResponse = Depends(get_current_user),
):
"""List stored analysis results with cursor-based pagination.
Returns past analysis results ordered by timestamp descending. Use
``limit`` to control page size (default 50, max 200). The response
includes a ``next_cursor`` field; pass it back as the ``cursor`` query
parameter to fetch the next page. When ``next_cursor`` is ``null``,
there are no more results.
Args:
company_name: Optional filter by company name
limit: Maximum number of results to return (default 50, max 200)
cursor: Opaque pagination cursor from a previous response
Returns:
Paginated list of analysis results
"""
db = _get_job_db()
rows = db.list_analyses(company_name=company_name, limit=limit + 1, cursor=cursor)
has_next = len(rows) > limit
if has_next:
rows = rows[:limit]
items = [AnalysisRecord(**row) for row in rows]
next_cursor = None
if has_next and rows:
last = rows[-1]
ts = last["timestamp"]
ts_str = ts.isoformat() if hasattr(ts, "isoformat") else str(ts)
next_cursor = f"{ts_str}|{last['id']}"
return PaginatedAnalysisResponse(items=items, next_cursor=next_cursor)
@app.get(
"/analyze/{company_name}",
response_model=CompanyAnalysisResponse,
@@ -1001,58 +1011,6 @@ async def analyze_single_patent(
raise HTTPException(status_code=404, detail=str(e))
@app.get(
"/analyze/batch",
response_model=PaginatedAnalysisResponse,
tags=["Analysis"],
)
async def list_analysis_results(
company_name: Annotated[
str | None,
Query(description="Filter results by company name"),
] = None,
limit: Annotated[int, Query(ge=1, le=200)] = 50,
cursor: Annotated[
str | None,
Query(description="Opaque cursor from a previous response's next_cursor field"),
] = None,
_: UserResponse = Depends(get_current_user),
):
"""List stored analysis results with cursor-based pagination.
Returns past analysis results ordered by timestamp descending. Use
``limit`` to control page size (default 50, max 200). The response
includes a ``next_cursor`` field; pass it back as the ``cursor`` query
parameter to fetch the next page. When ``next_cursor`` is ``null``,
there are no more results.
Args:
company_name: Optional filter by company name
limit: Maximum number of results to return (default 50, max 200)
cursor: Opaque pagination cursor from a previous response
Returns:
Paginated list of analysis results
"""
db = _get_job_db()
rows = db.list_analyses(company_name=company_name, limit=limit + 1, cursor=cursor)
has_next = len(rows) > limit
if has_next:
rows = rows[:limit]
items = [AnalysisRecord(**row) for row in rows]
next_cursor = None
if has_next and rows:
last = rows[-1]
ts = last["timestamp"]
ts_str = ts.isoformat() if hasattr(ts, "isoformat") else str(ts)
next_cursor = f"{ts_str}|{last['id']}"
return PaginatedAnalysisResponse(items=items, next_cursor=next_cursor)
@app.post(
"/analyze/batch",
response_model=BatchAnalysisResponse,
-9
View File
@@ -11,7 +11,6 @@ import { Batch } from './pages/Batch';
import { AnalyticsPage } from './pages/Analytics';
import { About } from './pages/About';
import { AdminUsers } from './pages/AdminUsers';
import { AdminRateLimits } from './pages/AdminRateLimits';
import { Compare } from './pages/Compare';
const queryClient = new QueryClient({
@@ -57,14 +56,6 @@ function App() {
</ProtectedRoute>
}
/>
<Route
path="/admin/rate-limits"
element={
<ProtectedRoute requireAdmin>
<AdminRateLimits />
</ProtectedRoute>
}
/>
</Route>
{/* Default redirect */}
+48 -3
View File
@@ -1,5 +1,5 @@
import axios, { AxiosError, InternalAxiosRequestConfig } from 'axios';
import type { TokenResponse, User, CompanyAnalysis, BatchAnalysisResult, JobStatus, Analytics } from '../types';
import type { TokenResponse, User, CompanyAnalysis, BatchAnalysisResult, JobStatus, Analytics, PaginatedJobsResponse, PaginatedAnalysisResponse } from '../types';
const API_BASE_URL = import.meta.env.VITE_API_URL || '/api';
@@ -141,15 +141,60 @@ export const analysisApi = {
return response.data;
},
listJobs: async (status?: string, limit = 10): Promise<JobStatus[]> => {
listJobs: async (status?: string, limit = 50, cursor?: string): Promise<PaginatedJobsResponse> => {
const params = new URLSearchParams();
if (status) params.append('status', status);
params.append('limit', limit.toString());
const response = await api.get<JobStatus[]>(`/jobs?${params}`);
if (cursor) params.append('cursor', cursor);
const response = await api.get<PaginatedJobsResponse>(`/jobs?${params}`);
return response.data;
},
listBatchAnalyses: async (companyName?: string, limit = 50, cursor?: string): Promise<PaginatedAnalysisResponse> => {
const params = new URLSearchParams();
if (companyName) params.append('company_name', companyName);
params.append('limit', limit.toString());
if (cursor) params.append('cursor', cursor);
const response = await api.get<PaginatedAnalysisResponse>(`/analyze/batch?${params}`);
return response.data;
},
getCompanyHistory: async (companyName: string, limit = 20): Promise<AnalysisHistoryItem[]> => {
const response = await api.get<AnalysisHistoryItem[]>(
`/analyze/${encodeURIComponent(companyName)}/history?limit=${limit}`
);
return response.data;
},
diffAnalyses: async (companyName: string, fromId: number, toId: number): Promise<AnalysisDiff> => {
const response = await api.get<AnalysisDiff>(
`/analyze/${encodeURIComponent(companyName)}/diff?from=${fromId}&to=${toId}`
);
return response.data;
},
};
// Analysis diff types
export interface AnalysisHistoryItem {
id: number;
analysis_type: string | null;
model: string | null;
timestamp: string;
}
export interface AnalysisDiff {
company_name: string;
from_id: number;
to_id: number;
from_timestamp: string;
to_timestamp: string;
patent_count_delta: number;
added_patents: string[];
removed_patents: string[];
changed_fields: Record<string, { from: string | null; to: string | null }>;
summary: string;
}
// Export API
export const exportApi = {
exportCsv: async (companyName: string): Promise<void> => {
+96 -5
View File
@@ -222,7 +222,17 @@ export interface paths {
path?: never;
cookie?: never;
};
get?: never;
/**
* List Batch Analyses
* @description List stored analysis results with cursor-based pagination.
*
* Returns past analysis results ordered by timestamp descending. Use
* ``limit`` to control page size (default 50, max 200). The response
* includes a ``next_cursor`` field; pass it back as the ``cursor`` query
* parameter to fetch the next page. When ``next_cursor`` is ``null``,
* there are no more results.
*/
get: operations["list_batch_analyses_analyze_batch_get"];
put?: never;
/**
* Analyze Companies Batch
@@ -308,14 +318,15 @@ export interface paths {
};
/**
* List Jobs
* @description List all analysis jobs.
* @description List analysis jobs with cursor-based pagination.
*
* Args:
* status: Optional filter by job status
* limit: Maximum number of jobs to return (default 10, max 100)
* limit: Maximum number of jobs to return (default 50, max 200)
* cursor: Opaque cursor from a previous response's next_cursor field
*
* Returns:
* List of job statuses
* Paginated list of job statuses with next_cursor for subsequent pages
*/
get: operations["list_jobs_jobs_get"];
put?: never;
@@ -330,6 +341,27 @@ export interface paths {
export type webhooks = Record<string, never>;
export interface components {
schemas: {
/**
* AnalysisRecord
* @description A single stored analysis result.
*/
AnalysisRecord: {
/** Id */
id: number;
/** Company Name */
company_name?: string | null;
/** Analysis Type */
analysis_type?: string | null;
/** Model */
model?: string | null;
/** Response */
response?: string | null;
/**
* Timestamp
* Format: date-time
*/
timestamp?: string | null;
};
/**
* AnalyticsResponse
* @description Analytics response model.
@@ -425,6 +457,26 @@ export interface components {
*/
timestamp: string;
};
/**
* PaginatedAnalysisResponse
* @description Paginated response for analysis result listings.
*/
PaginatedAnalysisResponse: {
/** Items */
items: components["schemas"]["AnalysisRecord"][];
/** Next Cursor */
next_cursor?: string | null;
};
/**
* PaginatedJobsResponse
* @description Paginated response for job listings.
*/
PaginatedJobsResponse: {
/** Items */
items: components["schemas"]["JobStatus"][];
/** Next Cursor */
next_cursor?: string | null;
};
/**
* JobStatus
* @description Status of a background analysis job.
@@ -944,7 +996,10 @@ export interface operations {
query?: {
/** @description Filter by status: pending, running, completed, failed */
status?: string | null;
/** @description Maximum number of jobs to return (default 50, max 200) */
limit?: number;
/** @description Opaque cursor from a previous response's next_cursor field */
cursor?: string | null;
};
header?: never;
path?: never;
@@ -958,7 +1013,43 @@ export interface operations {
[name: string]: unknown;
};
content: {
"application/json": components["schemas"]["JobStatus"][];
"application/json": components["schemas"]["PaginatedJobsResponse"];
};
};
/** @description Validation Error */
422: {
headers: {
[name: string]: unknown;
};
content: {
"application/json": components["schemas"]["HTTPValidationError"];
};
};
};
};
list_batch_analyses_analyze_batch_get: {
parameters: {
query?: {
/** @description Filter results by company name */
company_name?: string | null;
/** @description Maximum number of results to return (default 50, max 200) */
limit?: number;
/** @description Opaque cursor from a previous response's next_cursor field */
cursor?: string | null;
};
header?: never;
path?: never;
cookie?: never;
};
requestBody?: never;
responses: {
/** @description Successful Response */
200: {
headers: {
[name: string]: unknown;
};
content: {
"application/json": components["schemas"]["PaginatedAnalysisResponse"];
};
};
/** @description Validation Error */
+1 -2
View File
@@ -1,7 +1,7 @@
import { Outlet, NavLink, useNavigate } from 'react-router-dom';
import { useAuth } from '../context/AuthContext';
import { useTheme } from '../context/ThemeContext';
import { Search, Layers, BarChart3, Info, Users, LogOut, GitCompareArrows, Sun, Moon, ShieldAlert } from 'lucide-react';
import { Search, Layers, BarChart3, Info, Users, LogOut, GitCompareArrows, Sun, Moon } from 'lucide-react';
export function Layout() {
const { user, isAdmin, logout } = useAuth();
@@ -23,7 +23,6 @@ export function Layout() {
if (isAdmin) {
navItems.push({ to: '/admin/users', icon: Users, label: 'Users' });
navItems.push({ to: '/admin/rate-limits', icon: ShieldAlert, label: 'Rate Limits' });
}
return (
-240
View File
@@ -1,240 +0,0 @@
import { useState } from 'react';
import { useQuery } from '@tanstack/react-query';
import { adminApi } from '../api/client';
import type { RateLimitStatsResponse } from '../api/client';
import { ShieldAlert, Activity, AlertCircle, RefreshCw, Clock } from 'lucide-react';
const REFRESH_OPTIONS = [
{ label: '15s', value: 15_000 },
{ label: '30s', value: 30_000 },
{ label: '1m', value: 60_000 },
{ label: 'Off', value: 0 },
];
export function AdminRateLimits() {
const [refreshInterval, setRefreshInterval] = useState(30_000);
const { data, isLoading, isError, dataUpdatedAt } = useQuery<RateLimitStatsResponse>({
queryKey: ['admin-rate-limits'],
queryFn: () => adminApi.getRateLimits(),
refetchInterval: refreshInterval || false,
});
if (isLoading) {
return (
<div className="flex items-center justify-center min-h-[400px]">
<div className="animate-spin rounded-full h-12 w-12 border-t-2 border-b-2 border-primary"></div>
</div>
);
}
if (isError) {
return (
<div className="flex items-center gap-2 bg-error/10 border border-error/20 text-error rounded-xl px-4 py-3">
<AlertCircle size={18} />
<span>Failed to load rate limit statistics.</span>
</div>
);
}
const maxThrottledCount = data?.throttled_over_time?.length
? Math.max(...data.throttled_over_time.map((b) => b.count))
: 0;
return (
<div className="space-y-6">
{/* Header */}
<div className="flex items-center justify-between flex-wrap gap-4">
<div>
<h2 className="text-xl font-semibold text-text-primary border-b-2 border-primary/30 pb-2 mb-2">
Rate Limiting Dashboard
</h2>
<p className="text-text-secondary">Monitor API rate limits and throttled requests.</p>
</div>
<div className="flex items-center gap-3">
{/* Last updated */}
{dataUpdatedAt > 0 && (
<span className="text-xs text-text-secondary flex items-center gap-1">
<Clock size={12} />
Updated {new Date(dataUpdatedAt).toLocaleTimeString()}
</span>
)}
{/* Refresh interval selector */}
<div className="flex items-center gap-1 bg-bg-card/60 border border-primary/15 rounded-xl p-1">
<RefreshCw size={14} className="text-text-secondary ml-2" />
{REFRESH_OPTIONS.map((opt) => (
<button
key={opt.value}
onClick={() => setRefreshInterval(opt.value)}
className={`px-3 py-1 rounded-lg text-xs font-medium transition-all ${
refreshInterval === opt.value
? 'bg-primary text-white'
: 'text-text-secondary hover:text-text-primary hover:bg-bg-card-hover'
}`}
>
{opt.label}
</button>
))}
</div>
</div>
</div>
{/* Summary cards */}
<div className="grid grid-cols-1 md:grid-cols-3 gap-4">
<div className="bg-bg-card/60 border border-primary/15 rounded-2xl p-5">
<div className="flex items-center gap-2 mb-2">
<Activity size={18} className="text-primary" />
<span className="text-sm font-semibold text-text-secondary uppercase tracking-wider">
Total Requests
</span>
</div>
<div className="text-3xl font-bold text-text-primary">
{data?.rate_limits.reduce((sum, rl) => sum + rl.total_requests, 0) ?? 0}
</div>
</div>
<div className="bg-bg-card/60 border border-error/15 rounded-2xl p-5">
<div className="flex items-center gap-2 mb-2">
<ShieldAlert size={18} className="text-error" />
<span className="text-sm font-semibold text-text-secondary uppercase tracking-wider">
Throttled (24h)
</span>
</div>
<div className="text-3xl font-bold text-error">
{data?.throttled_24h ?? 0}
</div>
</div>
<div className="bg-bg-card/60 border border-secondary/15 rounded-2xl p-5">
<div className="flex items-center gap-2 mb-2">
<ShieldAlert size={18} className="text-secondary" />
<span className="text-sm font-semibold text-text-secondary uppercase tracking-wider">
Rate-Limited Endpoints
</span>
</div>
<div className="text-3xl font-bold text-text-primary">
{data?.rate_limits.length ?? 0}
</div>
</div>
</div>
{/* Throttled over time chart (simple bar chart) */}
{data?.throttled_over_time && data.throttled_over_time.length > 0 && (
<div className="bg-bg-card/60 border border-primary/15 rounded-2xl p-5">
<h3 className="text-sm font-semibold text-text-secondary uppercase tracking-wider mb-4">
Throttled Requests Over Time (Last 24h)
</h3>
<div className="flex items-end gap-1 h-32">
{data.throttled_over_time.map((bucket) => {
const height = maxThrottledCount > 0 ? (bucket.count / maxThrottledCount) * 100 : 0;
const hour = new Date(bucket.timestamp).getHours();
return (
<div key={bucket.timestamp} className="flex-1 flex flex-col items-center gap-1">
<span className="text-xs text-text-secondary">{bucket.count}</span>
<div
className="w-full bg-error/70 rounded-t-sm min-h-[2px] transition-all"
style={{ height: `${Math.max(height, 2)}%` }}
title={`${bucket.timestamp}: ${bucket.count} throttled`}
/>
<span className="text-[10px] text-text-secondary">{hour}:00</span>
</div>
);
})}
</div>
</div>
)}
{/* Per-endpoint table */}
<div className="bg-bg-card/60 border border-primary/15 rounded-2xl overflow-hidden">
<div className="overflow-x-auto">
<table className="w-full">
<thead>
<tr className="border-b border-primary/10">
<th className="text-left px-6 py-4 text-sm font-semibold text-text-secondary uppercase tracking-wider">
Endpoint
</th>
<th className="text-left px-6 py-4 text-sm font-semibold text-text-secondary uppercase tracking-wider">
Limit
</th>
<th className="text-right px-6 py-4 text-sm font-semibold text-text-secondary uppercase tracking-wider">
Total Requests
</th>
<th className="text-right px-6 py-4 text-sm font-semibold text-text-secondary uppercase tracking-wider">
Rejected
</th>
</tr>
</thead>
<tbody className="divide-y divide-primary/10">
{data?.rate_limits.map((rl) => (
<tr key={rl.endpoint} className="hover:bg-bg-card-hover/50 transition-colors">
<td className="px-6 py-4 font-mono text-sm text-text-primary">{rl.endpoint}</td>
<td className="px-6 py-4">
<span className="inline-flex px-2 py-0.5 rounded-full text-xs font-medium bg-primary/10 text-primary border border-primary/20">
{rl.limit}
</span>
</td>
<td className="px-6 py-4 text-right text-text-primary font-semibold">
{rl.total_requests}
</td>
<td className="px-6 py-4 text-right">
<span className={rl.rejected_requests > 0 ? 'text-error font-semibold' : 'text-text-secondary'}>
{rl.rejected_requests}
</span>
</td>
</tr>
))}
</tbody>
</table>
</div>
</div>
{/* Per-IP breakdown */}
{data?.rate_limits.some((rl) => rl.by_ip.length > 0) && (
<div className="bg-bg-card/60 border border-primary/15 rounded-2xl overflow-hidden">
<div className="px-6 py-4 border-b border-primary/10">
<h3 className="text-sm font-semibold text-text-secondary uppercase tracking-wider">
Per-IP Breakdown
</h3>
</div>
<div className="overflow-x-auto">
<table className="w-full">
<thead>
<tr className="border-b border-primary/10">
<th className="text-left px-6 py-3 text-sm font-semibold text-text-secondary uppercase tracking-wider">
Endpoint
</th>
<th className="text-left px-6 py-3 text-sm font-semibold text-text-secondary uppercase tracking-wider">
IP Address
</th>
<th className="text-right px-6 py-3 text-sm font-semibold text-text-secondary uppercase tracking-wider">
Total
</th>
<th className="text-right px-6 py-3 text-sm font-semibold text-text-secondary uppercase tracking-wider">
Rejected
</th>
</tr>
</thead>
<tbody className="divide-y divide-primary/10">
{data.rate_limits.flatMap((rl) =>
rl.by_ip.map((ipEntry) => (
<tr
key={`${rl.endpoint}-${ipEntry.ip}`}
className="hover:bg-bg-card-hover/50 transition-colors"
>
<td className="px-6 py-3 font-mono text-sm text-text-primary">{rl.endpoint}</td>
<td className="px-6 py-3 font-mono text-sm text-text-secondary">{ipEntry.ip}</td>
<td className="px-6 py-3 text-right text-text-primary">{ipEntry.total}</td>
<td className="px-6 py-3 text-right">
<span className={ipEntry.rejected > 0 ? 'text-error font-semibold' : 'text-text-secondary'}>
{ipEntry.rejected}
</span>
</td>
</tr>
))
)}
</tbody>
</table>
</div>
</div>
)}
</div>
);
}
+5
View File
@@ -30,3 +30,8 @@ export type HealthResponse = components['schemas']['HealthResponse'];
export type BatchAnalysisRequest = components['schemas']['BatchAnalysisRequest'];
export type ValidationError = components['schemas']['ValidationError'];
export type HTTPValidationError = components['schemas']['HTTPValidationError'];
// Pagination types
export type AnalysisRecord = components['schemas']['AnalysisRecord'];
export type PaginatedAnalysisResponse = components['schemas']['PaginatedAnalysisResponse'];
export type PaginatedJobsResponse = components['schemas']['PaginatedJobsResponse'];
+171 -19
View File
@@ -1,12 +1,13 @@
"""Tests for cursor-based pagination on /analyze/batch GET and /jobs endpoints."""
from datetime import datetime, timedelta
from unittest.mock import Mock, patch
from datetime import datetime, timedelta, timezone
from unittest.mock import MagicMock, Mock, patch
import pytest
from fastapi.testclient import TestClient
from SPARC.api import app
from SPARC.auth import create_access_token
@pytest.fixture
@@ -15,6 +16,27 @@ def client():
return TestClient(app)
@pytest.fixture(autouse=True)
def mock_auth_db():
"""Mock the auth DB so JWT token validation succeeds without a real database."""
db = MagicMock()
db.get_user_by_id.return_value = {
"id": 1,
"email": "user@test.com",
"role": "user",
"created_at": datetime(2025, 1, 1, tzinfo=timezone.utc),
}
with patch("SPARC.api.get_db_client", return_value=db), \
patch("SPARC.auth.get_db_client", return_value=db):
yield db
def _auth_header():
"""Create a Bearer auth header for a regular user."""
token = create_access_token(1, "user@test.com", "user")
return {"Authorization": f"Bearer {token}"}
def _make_analysis_row(id_: int, minutes_ago: int = 0, company: str = "nvidia"):
"""Create a fake analysis row dict."""
ts = datetime.now() - timedelta(minutes=minutes_ago)
@@ -56,7 +78,7 @@ class TestAnalyzeBatchGetPagination:
]
mock_get_db.return_value = db
response = client.get("/analyze/batch?limit=10")
response = client.get("/analyze/batch?limit=10", headers=_auth_header())
assert response.status_code == 200
data = response.json()
assert len(data["items"]) == 2
@@ -71,7 +93,7 @@ class TestAnalyzeBatchGetPagination:
db.list_analyses.return_value = rows
mock_get_db.return_value = db
response = client.get("/analyze/batch?limit=3")
response = client.get("/analyze/batch?limit=3", headers=_auth_header())
assert response.status_code == 200
data = response.json()
assert len(data["items"]) == 3
@@ -84,11 +106,14 @@ class TestAnalyzeBatchGetPagination:
db.list_analyses.return_value = []
mock_get_db.return_value = db
client.get("/analyze/batch?cursor=2025-01-01T00:00:00|42")
client.get("/analyze/batch?cursor=2025-01-01T00:00:00|42", headers=_auth_header())
db.list_analyses.assert_called_once()
call_kwargs = db.list_analyses.call_args
assert call_kwargs.kwargs.get("cursor") == "2025-01-01T00:00:00|42" or \
(call_kwargs[1].get("cursor") == "2025-01-01T00:00:00|42" if len(call_kwargs) > 1 else False)
cursor_val = (
call_kwargs.kwargs.get("cursor")
or (call_kwargs[1].get("cursor") if len(call_kwargs) > 1 else None)
)
assert cursor_val == "2025-01-01T00:00:00|42"
@patch("SPARC.api._get_job_db")
def test_default_limit_is_50(self, mock_get_db, client):
@@ -97,19 +122,19 @@ class TestAnalyzeBatchGetPagination:
db.list_analyses.return_value = []
mock_get_db.return_value = db
client.get("/analyze/batch")
client.get("/analyze/batch", headers=_auth_header())
call_kwargs = db.list_analyses.call_args
# The endpoint requests limit+1 from DB, so 51
assert 51 in call_kwargs.args or call_kwargs.kwargs.get("limit") == 51
def test_limit_over_200_rejected(self, client):
"""Limit > 200 should be rejected with 422."""
response = client.get("/analyze/batch?limit=201")
response = client.get("/analyze/batch?limit=201", headers=_auth_header())
assert response.status_code == 422
def test_limit_zero_rejected(self, client):
"""Limit < 1 should be rejected with 422."""
response = client.get("/analyze/batch?limit=0")
response = client.get("/analyze/batch?limit=0", headers=_auth_header())
assert response.status_code == 422
@patch("SPARC.api._get_job_db")
@@ -119,10 +144,13 @@ class TestAnalyzeBatchGetPagination:
db.list_analyses.return_value = []
mock_get_db.return_value = db
client.get("/analyze/batch?company_name=intel")
client.get("/analyze/batch?company_name=intel", headers=_auth_header())
call_kwargs = db.list_analyses.call_args
assert call_kwargs.kwargs.get("company_name") == "intel" or \
"intel" in (call_kwargs.args if call_kwargs.args else [])
company_val = (
call_kwargs.kwargs.get("company_name")
or (call_kwargs[1].get("company_name") if len(call_kwargs) > 1 else None)
)
assert company_val == "intel"
@patch("SPARC.api._get_job_db")
def test_empty_result_set(self, mock_get_db, client):
@@ -131,15 +159,30 @@ class TestAnalyzeBatchGetPagination:
db.list_analyses.return_value = []
mock_get_db.return_value = db
response = client.get("/analyze/batch")
response = client.get("/analyze/batch", headers=_auth_header())
assert response.status_code == 200
data = response.json()
assert data["items"] == []
assert data["next_cursor"] is None
@patch("SPARC.api._get_job_db")
def test_subsequent_page_uses_cursor(self, mock_get_db, client):
"""Passing a cursor should retrieve the next page of results."""
db = Mock()
db.list_analyses.return_value = [_make_analysis_row(99, minutes_ago=100)]
mock_get_db.return_value = db
class TestJobsPaginationDefaults:
"""Test that /jobs endpoint uses updated defaults."""
cursor = "2025-06-01T12:00:00|50"
response = client.get(f"/analyze/batch?limit=10&cursor={cursor}", headers=_auth_header())
assert response.status_code == 200
data = response.json()
# Only one item returned → last page → no next cursor
assert len(data["items"]) == 1
assert data["next_cursor"] is None
class TestJobsPagination:
"""Test cursor-based pagination on GET /jobs."""
@patch("SPARC.api._get_job_db")
def test_default_limit_is_50(self, mock_get_db, client):
@@ -148,14 +191,19 @@ class TestJobsPaginationDefaults:
db.list_jobs.return_value = []
mock_get_db.return_value = db
client.get("/jobs")
client.get("/jobs", headers=_auth_header())
call_kwargs = db.list_jobs.call_args
# Endpoint requests limit+1 from DB, so 51
assert 51 in call_kwargs.args or call_kwargs.kwargs.get("limit") == 51
def test_limit_over_200_rejected(self, client):
"""Limit > 200 should be rejected with 422."""
response = client.get("/jobs?limit=201")
response = client.get("/jobs?limit=201", headers=_auth_header())
assert response.status_code == 422
def test_limit_zero_rejected(self, client):
"""Limit < 1 should be rejected with 422."""
response = client.get("/jobs?limit=0", headers=_auth_header())
assert response.status_code == 422
@patch("SPARC.api._get_job_db")
@@ -165,5 +213,109 @@ class TestJobsPaginationDefaults:
db.list_jobs.return_value = []
mock_get_db.return_value = db
response = client.get("/jobs?limit=200")
response = client.get("/jobs?limit=200", headers=_auth_header())
assert response.status_code == 200
@patch("SPARC.api._get_job_db")
def test_first_page_returns_items_and_cursor(self, mock_get_db, client):
"""First page with more results than limit should return next_cursor."""
db = Mock()
# Return limit+1 rows to simulate more data available
rows = [_make_job_row(f"job-{i}", minutes_ago=i) for i in range(4)]
db.list_jobs.return_value = rows
mock_get_db.return_value = db
response = client.get("/jobs?limit=3", headers=_auth_header())
assert response.status_code == 200
data = response.json()
assert len(data["items"]) == 3
assert data["next_cursor"] is not None
@patch("SPARC.api._get_job_db")
def test_last_page_returns_no_cursor(self, mock_get_db, client):
"""When fewer results than limit, next_cursor should be null (last page)."""
db = Mock()
rows = [
_make_job_row("job-a", minutes_ago=5),
_make_job_row("job-b", minutes_ago=10),
]
db.list_jobs.return_value = rows
mock_get_db.return_value = db
response = client.get("/jobs?limit=10", headers=_auth_header())
assert response.status_code == 200
data = response.json()
assert len(data["items"]) == 2
assert data["next_cursor"] is None
@patch("SPARC.api._get_job_db")
def test_cursor_forwarded_to_db(self, mock_get_db, client):
"""The cursor query param should be forwarded to the database layer."""
db = Mock()
db.list_jobs.return_value = []
mock_get_db.return_value = db
client.get("/jobs?cursor=2025-01-01T00:00:00|job-99", headers=_auth_header())
db.list_jobs.assert_called_once()
call_kwargs = db.list_jobs.call_args
cursor_val = (
call_kwargs.kwargs.get("cursor")
or (call_kwargs[1].get("cursor") if len(call_kwargs) > 1 else None)
)
assert cursor_val == "2025-01-01T00:00:00|job-99"
@patch("SPARC.api._get_job_db")
def test_empty_result_set(self, mock_get_db, client):
"""Empty result set returns empty items list and null next_cursor."""
db = Mock()
db.list_jobs.return_value = []
mock_get_db.return_value = db
response = client.get("/jobs", headers=_auth_header())
assert response.status_code == 200
data = response.json()
assert data["items"] == []
assert data["next_cursor"] is None
@patch("SPARC.api._get_job_db")
def test_status_filter_forwarded(self, mock_get_db, client):
"""The status filter should be forwarded to the database layer."""
db = Mock()
db.list_jobs.return_value = []
mock_get_db.return_value = db
client.get("/jobs?status=completed", headers=_auth_header())
db.list_jobs.assert_called_once()
call_kwargs = db.list_jobs.call_args
status_val = (
call_kwargs.kwargs.get("status")
or (call_kwargs[1].get("status") if len(call_kwargs) > 1 else None)
)
assert status_val == "completed"
@patch("SPARC.api._get_job_db")
def test_response_has_paginated_shape(self, mock_get_db, client):
"""Response must have 'items' and 'next_cursor' fields (paginated shape)."""
db = Mock()
db.list_jobs.return_value = [_make_job_row("job-x")]
mock_get_db.return_value = db
response = client.get("/jobs?limit=10", headers=_auth_header())
assert response.status_code == 200
data = response.json()
assert "items" in data
assert "next_cursor" in data
@patch("SPARC.api._get_job_db")
def test_subsequent_page_uses_cursor(self, mock_get_db, client):
"""Passing cursor returns the next page; last page has null next_cursor."""
db = Mock()
db.list_jobs.return_value = [_make_job_row("job-last", minutes_ago=200)]
mock_get_db.return_value = db
cursor = "2025-06-01T12:00:00|job-50"
response = client.get(f"/jobs?limit=10&cursor={cursor}", headers=_auth_header())
assert response.status_code == 200
data = response.json()
assert len(data["items"]) == 1
assert data["next_cursor"] is None
+2 -71
View File
@@ -20,10 +20,8 @@ def client():
def reset_stats():
"""Reset rate limit stats between tests."""
api._rate_limit_stats.clear()
api._rejected_log.clear()
yield
api._rate_limit_stats.clear()
api._rejected_log.clear()
def _mock_admin():
@@ -52,7 +50,8 @@ class TestRateLimitAdminEndpoint:
app.dependency_overrides.clear()
def test_non_admin_rejected(self, client):
"""Non-admin users should get 401/403."""
"""Non-admin users should get 403."""
# Without overriding the dependency, it should fail auth
response = client.get("/admin/rate-limits")
assert response.status_code in (401, 403)
@@ -78,9 +77,6 @@ class TestRateLimitAdminEndpoint:
for rl in data["rate_limits"]:
assert rl["total_requests"] == 0
assert rl["rejected_requests"] == 0
assert rl["by_ip"] == []
assert data["throttled_24h"] == 0
assert data["throttled_over_time"] == []
finally:
app.dependency_overrides.clear()
@@ -111,68 +107,3 @@ class TestRateLimitAdminEndpoint:
assert isinstance(rl["limit"], str)
finally:
app.dependency_overrides.clear()
def test_per_ip_breakdown(self, client):
"""Stats should include per-IP breakdown with total and rejected counts."""
api._track_rate_limit_request("/auth/login", "10.0.0.1")
api._track_rate_limit_request("/auth/login", "10.0.0.1", rejected=True)
api._track_rate_limit_request("/auth/login", "10.0.0.2")
app.dependency_overrides[api.get_current_admin] = _mock_admin
try:
response = client.get("/admin/rate-limits")
data = response.json()
login_stats = next(rl for rl in data["rate_limits"] if rl["endpoint"] == "/auth/login")
by_ip = login_stats["by_ip"]
assert len(by_ip) == 2
ip1 = next(entry for entry in by_ip if entry["ip"] == "10.0.0.1")
assert ip1["total"] == 2
assert ip1["rejected"] == 1
ip2 = next(entry for entry in by_ip if entry["ip"] == "10.0.0.2")
assert ip2["total"] == 1
assert ip2["rejected"] == 0
finally:
app.dependency_overrides.clear()
def test_throttled_24h_count(self, client):
"""Should report total throttled requests in the last 24 hours."""
api._track_rate_limit_request("/auth/login", "10.0.0.1", rejected=True)
api._track_rate_limit_request("/auth/register", "10.0.0.2", rejected=True)
app.dependency_overrides[api.get_current_admin] = _mock_admin
try:
response = client.get("/admin/rate-limits")
data = response.json()
assert data["throttled_24h"] == 2
finally:
app.dependency_overrides.clear()
def test_throttled_over_time_structure(self, client):
"""Throttled-over-time should be a list of {timestamp, count} buckets."""
api._track_rate_limit_request("/auth/login", "10.0.0.1", rejected=True)
app.dependency_overrides[api.get_current_admin] = _mock_admin
try:
response = client.get("/admin/rate-limits")
data = response.json()
assert len(data["throttled_over_time"]) >= 1
entry = data["throttled_over_time"][0]
assert "timestamp" in entry
assert "count" in entry
assert entry["count"] >= 1
finally:
app.dependency_overrides.clear()
def test_response_shape_matches_contract(self, client):
"""The full response should match the expected shape for the frontend."""
app.dependency_overrides[api.get_current_admin] = _mock_admin
try:
response = client.get("/admin/rate-limits")
data = response.json()
# Top-level keys
assert set(data.keys()) == {"rate_limits", "throttled_24h", "throttled_over_time"}
# Each rate_limit entry
for rl in data["rate_limits"]:
assert set(rl.keys()) == {"endpoint", "limit", "total_requests", "rejected_requests", "by_ip"}
finally:
app.dependency_overrides.clear()