Compare commits

..

1 Commits

Author SHA1 Message Date
agent-company 3dac88ec90 docs: document patent PDF storage, add FileNotFoundError, commit lockfile
- Add docstring to analyze_single_patent explaining the PDF prerequisite
- Raise FileNotFoundError with helpful message when PDF is missing
- Add patent PDF storage section to README with Docker volume mount example
- Commit frontend/package-lock.json for reproducible builds

Closes leeworks-agents/SPARC#15
Closes leeworks-agents/SPARC#17

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-26 04:17:09 +00:00
9 changed files with 4779 additions and 190 deletions
+9 -30
View File
@@ -1,42 +1,21 @@
# SPARC Configuration # SPARC Configuration
# ---- Application Environment ----
# Set to "production" or "staging" in deployed environments.
# The API will refuse to start with the default JWT secret unless APP_ENV=development.
APP_ENV=development
# ---- API Keys ----
# SerpAPI key for patent search # SerpAPI key for patent search
API_KEY=your_serpapi_key_here API_KEY=your_serpapi_key_here
# OpenRouter API key for LLM analysis # OpenRouter API key for LLM analysis
OPENROUTER_API_KEY=your_openrouter_key_here OPENROUTER_API_KEY=your_openrouter_key_here
# ---- Database ---- # Database configuration
# All messages are stored in the database for persistence and caching
# PostgreSQL credentials (used by docker-compose) DATABASE_URL=postgresql://postgres:postgres@localhost:5432/sparc
POSTGRES_USER=postgres
POSTGRES_PASSWORD=change-me-to-a-secure-password
POSTGRES_DB=sparc
# Full database URL (must match the credentials above)
DATABASE_URL=postgresql://postgres:change-me-to-a-secure-password@localhost:5432/sparc
# ---- Authentication ----
# JWT Secret for signing tokens
# IMPORTANT: Change this to a secure random string in production
JWT_SECRET=your-secure-jwt-secret-change-in-production
# ---- CORS ----
# Comma-separated list of allowed origins for CORS
# Defaults to http://localhost:3000,http://localhost:5173 when unset
# CORS_ORIGINS=https://sparc.example.com,https://app.example.com
# ---- Cache ----
# Cache configuration
# When USE_CACHE=true: check database for cached responses before making API calls # When USE_CACHE=true: check database for cached responses before making API calls
# When USE_CACHE=false: always make fresh API calls (still stores results in database) # When USE_CACHE=false: always make fresh API calls (still stores results in database)
# Default: true
USE_CACHE=true USE_CACHE=true
# JWT Secret for authentication
# IMPORTANT: Change this to a secure random string in production
JWT_SECRET=your-secure-jwt-secret-change-in-production
+15
View File
@@ -54,6 +54,21 @@ docker-compose up -d
# - API Docs: http://localhost:8000/docs # - API Docs: http://localhost:8000/docs
``` ```
#### Patent PDF Storage
The API stores downloaded patent PDFs in a `patents/` directory. In Docker,
this is mounted as a bind mount (`./patents:/app/patents`) so that PDFs persist
across container restarts.
If you deploy to a different environment, ensure the `patents/` directory is a
persistent volume. Without it, PDFs will be re-downloaded on every analysis.
```yaml
# docker-compose.yml excerpt
volumes:
- ./patents:/app/patents
```
### NixOS ### NixOS
```bash ```bash
+19 -5
View File
@@ -104,21 +104,33 @@ class CompanyAnalyzer:
def analyze_single_patent(self, patent_id: str, company_name: str) -> str: def analyze_single_patent(self, patent_id: str, company_name: str) -> str:
"""Analyze a single patent by ID. """Analyze a single patent by ID.
Useful for focused analysis of specific innovations. Prerequisite:
The patent PDF must already exist at ``patents/{patent_id}.pdf``
before calling this method. PDFs are downloaded automatically when
using the batch analysis pipeline (``analyze_company`` or the
``/analyze/batch`` API endpoint). For standalone usage, download
the PDF manually or call ``SERP.save_patents()`` first.
Args: Args:
patent_id: Publication ID of the patent patent_id: Publication ID of the patent (e.g. "US-11234567-B2")
company_name: Name of the company (for context) company_name: Name of the company (for context)
Returns: Returns:
Analysis of the specific patent's innovation quality Analysis of the specific patent's innovation quality
Raises:
FileNotFoundError: If the patent PDF is not found at the expected path.
""" """
# Note: This simplified version assumes the patent PDF is already downloaded import os
# A more complete implementation would support direct patent ID lookup
print(f"Analyzing patent {patent_id} for {company_name}...")
patent_path = f"patents/{patent_id}.pdf" patent_path = f"patents/{patent_id}.pdf"
if not os.path.exists(patent_path):
raise FileNotFoundError(
f"Patent PDF not found at '{patent_path}'. "
f"Download the PDF first using SERP.save_patents() or the batch analysis pipeline."
)
try: try:
sections = SERP.parse_patent_pdf(patent_path) sections = SERP.parse_patent_pdf(patent_path)
minimized_content = SERP.minimize_patent_for_llm(sections) minimized_content = SERP.minimize_patent_for_llm(sections)
@@ -129,6 +141,8 @@ class CompanyAnalyzer:
return analysis return analysis
except FileNotFoundError:
raise
except Exception as e: except Exception as e:
return f"Failed to analyze patent {patent_id}: {e}" return f"Failed to analyze patent {patent_id}: {e}"
+1 -3
View File
@@ -16,7 +16,6 @@ from SPARC.analyzer import CompanyAnalyzer
from SPARC.auth import ( from SPARC.auth import (
TokenResponse, TokenResponse,
UserResponse, UserResponse,
check_jwt_secret,
create_tokens, create_tokens,
decode_token, decode_token,
get_current_admin, get_current_admin,
@@ -151,7 +150,6 @@ _analyzer: CompanyAnalyzer | None = None
async def lifespan(app: FastAPI): async def lifespan(app: FastAPI):
"""Initialize resources on startup.""" """Initialize resources on startup."""
global _analyzer global _analyzer
check_jwt_secret()
_analyzer = CompanyAnalyzer() _analyzer = CompanyAnalyzer()
yield yield
# Cleanup if needed # Cleanup if needed
@@ -169,7 +167,7 @@ app = FastAPI(
# Add CORS middleware for React frontend # Add CORS middleware for React frontend
app.add_middleware( app.add_middleware(
CORSMiddleware, CORSMiddleware,
allow_origins=config.cors_origins, allow_origins=["http://localhost:3000", "http://localhost:5173"],
allow_credentials=True, allow_credentials=True,
allow_methods=["*"], allow_methods=["*"],
allow_headers=["*"], allow_headers=["*"],
+1 -15
View File
@@ -13,25 +13,11 @@ from SPARC import config
from SPARC.database import DatabaseClient from SPARC.database import DatabaseClient
# JWT Configuration # JWT Configuration
_DEFAULT_JWT_SECRET = "sparc-secret-key-change-in-production" JWT_SECRET = os.getenv("JWT_SECRET", "sparc-secret-key-change-in-production")
JWT_SECRET = os.getenv("JWT_SECRET", _DEFAULT_JWT_SECRET)
JWT_ALGORITHM = "HS256" JWT_ALGORITHM = "HS256"
ACCESS_TOKEN_EXPIRE_MINUTES = 30 ACCESS_TOKEN_EXPIRE_MINUTES = 30
REFRESH_TOKEN_EXPIRE_DAYS = 7 REFRESH_TOKEN_EXPIRE_DAYS = 7
def check_jwt_secret() -> None:
"""Refuse to start with the default JWT secret in non-development environments.
Raises:
RuntimeError: If JWT_SECRET is the default value and APP_ENV is not 'development'.
"""
if JWT_SECRET == _DEFAULT_JWT_SECRET and config.app_env != "development":
raise RuntimeError(
f"FATAL: JWT_SECRET is set to the default value and APP_ENV={config.app_env!r}. "
"Set a secure JWT_SECRET environment variable before running in non-development environments."
)
security = HTTPBearer() security = HTTPBearer()
-13
View File
@@ -33,16 +33,3 @@ patent_thread_workers = int(os.getenv("PATENT_THREAD_WORKERS", "5"))
# Root path for running behind a reverse proxy (e.g., "/api" when served at /api/) # Root path for running behind a reverse proxy (e.g., "/api" when served at /api/)
# This ensures OpenAPI docs work correctly when accessed via the proxy # This ensures OpenAPI docs work correctly when accessed via the proxy
root_path = os.getenv("ROOT_PATH", "") root_path = os.getenv("ROOT_PATH", "")
# Application environment: "development", "staging", or "production"
# Used for safety checks (e.g., refusing default JWT secret in production)
app_env = os.getenv("APP_ENV", "development")
# CORS allowed origins (comma-separated)
# Defaults to localhost dev origins when unset
_cors_origins_raw = os.getenv("CORS_ORIGINS", "")
cors_origins: list[str] = (
[o.strip() for o in _cors_origins_raw.split(",") if o.strip()]
if _cors_origins_raw
else ["http://localhost:3000", "http://localhost:5173"]
)
+6 -8
View File
@@ -3,15 +3,15 @@ services:
image: postgres:16-alpine image: postgres:16-alpine
container_name: sparc-postgres container_name: sparc-postgres
environment: environment:
POSTGRES_USER: ${POSTGRES_USER} POSTGRES_USER: postgres
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} POSTGRES_PASSWORD: postgres
POSTGRES_DB: ${POSTGRES_DB} POSTGRES_DB: sparc
ports: ports:
- "5432:5432" - "5432:5432"
volumes: volumes:
- postgres_data:/var/lib/postgresql/data - postgres_data:/var/lib/postgresql/data
healthcheck: healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER}"] test: ["CMD-SHELL", "pg_isready -U postgres"]
interval: 5s interval: 5s
timeout: 5s timeout: 5s
retries: 5 retries: 5
@@ -22,7 +22,7 @@ services:
container_name: sparc-init-db container_name: sparc-init-db
command: python scripts/init_database.py command: python scripts/init_database.py
environment: environment:
DATABASE_URL: postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB} DATABASE_URL: postgresql://postgres:postgres@postgres:5432/sparc
depends_on: depends_on:
postgres: postgres:
condition: service_healthy condition: service_healthy
@@ -35,11 +35,9 @@ services:
environment: environment:
API_KEY: ${API_KEY} API_KEY: ${API_KEY}
OPENROUTER_API_KEY: ${OPENROUTER_API_KEY} OPENROUTER_API_KEY: ${OPENROUTER_API_KEY}
DATABASE_URL: postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB} DATABASE_URL: postgresql://postgres:postgres@postgres:5432/sparc
USE_CACHE: "true" USE_CACHE: "true"
JWT_SECRET: ${JWT_SECRET:-sparc-secret-key-change-in-production} JWT_SECRET: ${JWT_SECRET:-sparc-secret-key-change-in-production}
CORS_ORIGINS: ${CORS_ORIGINS:-}
APP_ENV: ${APP_ENV:-development}
ROOT_PATH: /api ROOT_PATH: /api
ports: ports:
- "8000:8000" - "8000:8000"
+4728
View File
File diff suppressed because it is too large Load Diff
-116
View File
@@ -1,116 +0,0 @@
"""Tests for security hardening: JWT secret startup check, CORS config, credential handling."""
import os
from unittest.mock import patch
import pytest
class TestJWTSecretStartupCheck:
"""Test the startup guard that refuses default JWT secret in non-dev environments."""
def test_default_secret_in_production_raises(self):
"""Starting with default secret and APP_ENV=production must raise RuntimeError."""
with patch.dict(os.environ, {"APP_ENV": "production"}):
# Reload config to pick up the new APP_ENV
import importlib
import SPARC.config
importlib.reload(SPARC.config)
from SPARC.auth import _DEFAULT_JWT_SECRET, check_jwt_secret
# Patch JWT_SECRET to the default
with patch("SPARC.auth.JWT_SECRET", _DEFAULT_JWT_SECRET):
with pytest.raises(RuntimeError, match="FATAL.*JWT_SECRET"):
check_jwt_secret()
# Restore config
with patch.dict(os.environ, {"APP_ENV": "development"}):
importlib.reload(SPARC.config)
def test_default_secret_in_development_succeeds(self):
"""Starting with default secret and APP_ENV=development must not raise."""
with patch.dict(os.environ, {"APP_ENV": "development"}):
import importlib
import SPARC.config
importlib.reload(SPARC.config)
from SPARC.auth import _DEFAULT_JWT_SECRET, check_jwt_secret
with patch("SPARC.auth.JWT_SECRET", _DEFAULT_JWT_SECRET):
# Should not raise
check_jwt_secret()
# Restore
importlib.reload(SPARC.config)
def test_custom_secret_in_production_succeeds(self):
"""Starting with a custom secret in production must not raise."""
with patch.dict(os.environ, {"APP_ENV": "production"}):
import importlib
import SPARC.config
importlib.reload(SPARC.config)
from SPARC.auth import check_jwt_secret
with patch("SPARC.auth.JWT_SECRET", "my-secure-random-secret-abc123"):
# Should not raise
check_jwt_secret()
with patch.dict(os.environ, {"APP_ENV": "development"}):
importlib.reload(SPARC.config)
def test_default_secret_unset_env_succeeds(self):
"""When APP_ENV is unset (defaults to development), default secret is allowed."""
with patch.dict(os.environ, {}, clear=False):
# Remove APP_ENV if present
env = os.environ.copy()
env.pop("APP_ENV", None)
with patch.dict(os.environ, env, clear=True):
import importlib
import SPARC.config
importlib.reload(SPARC.config)
from SPARC.auth import _DEFAULT_JWT_SECRET, check_jwt_secret
with patch("SPARC.auth.JWT_SECRET", _DEFAULT_JWT_SECRET):
# Should not raise (defaults to development)
check_jwt_secret()
with patch.dict(os.environ, {"APP_ENV": "development"}):
importlib.reload(SPARC.config)
class TestCORSConfig:
"""Test that CORS origins are configurable via environment variable."""
def test_default_cors_origins(self):
"""When CORS_ORIGINS is unset, defaults to localhost origins."""
with patch.dict(os.environ, {"CORS_ORIGINS": ""}):
import importlib
import SPARC.config
importlib.reload(SPARC.config)
assert SPARC.config.cors_origins == [
"http://localhost:3000",
"http://localhost:5173",
]
def test_custom_cors_origins(self):
"""Setting CORS_ORIGINS configures allowed origins."""
with patch.dict(os.environ, {"CORS_ORIGINS": "https://sparc.example.com,https://app.example.com"}):
import importlib
import SPARC.config
importlib.reload(SPARC.config)
assert SPARC.config.cors_origins == [
"https://sparc.example.com",
"https://app.example.com",
]
# Restore
with patch.dict(os.environ, {"CORS_ORIGINS": ""}):
importlib.reload(SPARC.config)
def test_single_cors_origin(self):
"""A single origin without comma works correctly."""
with patch.dict(os.environ, {"CORS_ORIGINS": "https://sparc.example.com"}):
import importlib
import SPARC.config
importlib.reload(SPARC.config)
assert SPARC.config.cors_origins == ["https://sparc.example.com"]
with patch.dict(os.environ, {"CORS_ORIGINS": ""}):
importlib.reload(SPARC.config)