From 97048917f22f84dfde7861b04977bd0fafa5ed3a Mon Sep 17 00:00:00 2001 From: agent-company Date: Mon, 30 Mar 2026 16:08:02 +0000 Subject: [PATCH] docs: document patent PDF volume mount for containerized deployments Switch docker-compose.yml from bind mount to a named volume (patent_data) so downloaded PDFs survive container recreation. Add a "Patent PDF Storage" section to DEPLOYMENT.md covering Docker Compose, Kubernetes PVC, and S3 alternatives. Closes leeworks-agents/SPARC#1360 Co-Authored-By: Claude Opus 4.6 (1M context) --- docker-compose.yml | 3 +- docs/DEPLOYMENT.md | 77 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 78 insertions(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 95cc313..14842b2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -49,7 +49,7 @@ services: init-db: condition: service_completed_successfully volumes: - - ./patents:/app/patents + - patent_data:/app/patents restart: unless-stopped # Optional: MinIO for S3-compatible local object storage @@ -86,4 +86,5 @@ services: volumes: postgres_data: + patent_data: minio_data: diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md index bb7bfd9..c89d883 100644 --- a/docs/DEPLOYMENT.md +++ b/docs/DEPLOYMENT.md @@ -276,7 +276,7 @@ The `docker-compose.yml` includes all services needed for production: |---------|-----------|------|-------------| | `postgres` | sparc-postgres | 5432 | PostgreSQL database | | `init-db` | sparc-init-db | - | One-time database initialization (seeds admin user) | -| `api` | sparc-api | 8000 | FastAPI REST API with JWT auth | +| `api` | sparc-api | 8000 | FastAPI REST API with JWT auth (patent PDFs stored in `patent_data` volume) | | `dashboard` | sparc-dashboard | 8080 | React TypeScript web UI | ### Common Docker Compose Commands @@ -307,6 +307,81 @@ docker-compose restart api --- +## Patent PDF Storage + +The SPARC API downloads patent PDFs during analysis and stores them at `/app/patents` inside the container. These files are used for subsequent single-patent analysis requests and as a local cache to avoid re-downloading. If this directory is not persisted, all downloaded PDFs are lost when the container is recreated. + +### Docker Compose (default) + +The default `docker-compose.yml` declares a named volume called `patent_data` that is mounted at `/app/patents`: + +```yaml +# In the api service: +volumes: + - patent_data:/app/patents + +# At the top-level volumes section: +volumes: + patent_data: +``` + +This means PDFs survive `docker compose down` and `docker compose up` cycles. To remove patent data intentionally, run: + +```bash +docker compose down -v # WARNING: also removes postgres_data +# or selectively: +docker volume rm sparc_patent_data +``` + +If you prefer a bind mount (e.g., for easy host-side access during development), replace the volume with: + +```yaml +volumes: + - ./patents:/app/patents +``` + +### Kubernetes + +For Kubernetes deployments, create a PersistentVolumeClaim and mount it into the API pod: + +```yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: sparc-patent-data +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: sparc-api +spec: + template: + spec: + containers: + - name: api + volumeMounts: + - name: patent-data + mountPath: /app/patents + volumes: + - name: patent-data + persistentVolumeClaim: + claimName: sparc-patent-data +``` + +Adjust the storage size based on expected patent volume. Each patent PDF is typically 1-5 MB. + +### S3 Object Storage (alternative) + +For production deployments that need shared or highly durable storage, set `STORAGE_BACKEND=s3` in your `.env` file. This stores patent PDFs in an S3-compatible bucket (AWS S3 or MinIO) instead of the local filesystem, eliminating the need for a persistent volume. See the S3/MinIO section in `.env.example` for configuration details. + +--- + ## Troubleshooting ### Database Connection Issues