Add POST /export/batch endpoint for multi-company ZIP download

Implements issue #1674: a new authenticated POST /export/batch endpoint that accepts a list of company names and an optional format (csv or pdf), compiles per-company exports into a ZIP archive using Python's zipfile module, and returns it as a streaming download. Key changes: - Extract `_fetch_company_rows`, `_build_company_csv`, `_build_company_pdf` helpers to eliminate duplication between the single-company endpoints and the new batch endpoint - Refactor `export_company_csv` and `export_company_pdf` to delegate to the new helpers - Add `BatchExportRequest` Pydantic model (companies list + format field) - Add `POST /export/batch` which iterates over companies, skips those with no data, writes per-company files into the ZIP, and always includes a `manifest.json` listing exported and skipped companies - Response header: `Content-Disposition: attachment; filename=sparc-export-<date>.zip` - 17 new tests covering: single company (CSV + PDF), multiple companies, all-missing, unauthenticated, invalid-token, manifest structure, input validation Closes leeworks-agents/SPARC#1674 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-19 15:21:09 +00:00
3 changed files with 532 additions and 125 deletions
@@ -36,28 +36,16 @@ from SPARC.auth import (
 )
 from SPARC.types import BatchAnalysisResult, CompanyAnalysisResult

-# Validated company name type: 2-128 chars, alphanumeric + spaces/hyphens/ampersands/periods only.
+# Validated company name type: 2-100 chars, alphanumeric + spaces/hyphens/ampersands/periods only.
 CompanyName = Annotated[
    str,
    StringConstraints(
        min_length=2,
-        max_length=128,
+        max_length=100,
        pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$",
    ),
 ]

-# Reusable Query constraint for optional company_name filter parameters.
-_COMPANY_NAME_FILTER_QUERY = Query(
-    default=None,
-    min_length=2,
-    max_length=128,
-    pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$",
-    description=(
-        "Company name filter (2-128 chars; alphanumeric, spaces, hyphens, "
-        "periods, and ampersands only)"
-    ),
-)
-

 # Pydantic models for API
 class CompanyAnalysisResponse(BaseModel):
@@ -501,7 +489,7 @@ async def add_tracked_company(

@app.delete("/admin/tracked/{company_name}", tags=["Admin"])
 async def remove_tracked_company(
-    company_name: Annotated[str, Path(min_length=2, max_length=128, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$")],
+    company_name: Annotated[str, Path(min_length=2, max_length=100, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$")],
    _: UserResponse = Depends(get_current_admin),
 ):
    """Remove a company from the tracked list (admin only)."""
@@ -687,27 +675,25 @@ async def get_analytics_trends(
 # ============== Export Endpoints ==============


-@app.get("/export/{company_name}", tags=["Export"])
-async def export_company_csv(
-    company_name: Annotated[str, Path(min_length=2, max_length=128, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$")],
-    _: UserResponse = Depends(get_current_user),
-):
-    """Export analysis results for a company as a CSV file.
+class BatchExportRequest(BaseModel):
+    """Request model for batch ZIP export of analysis results."""

-    Returns all stored analysis records for the given company, including
-    analysis type, model used, response text, and timestamp.
+    companies: list[CompanyName] = Field(
+        ..., min_length=1, max_length=50, description="List of company names to export"
+    )
+    format: str = Field(
+        default="csv",
+        pattern="^(csv|pdf)$",
+        description="Export format: 'csv' or 'pdf'",
+    )

-    Args:
-        company_name: Company name to export results for

-    Returns:
-        CSV file download
+def _fetch_company_rows(db, company_name: str) -> list:
+    """Fetch all non-cached analysis rows for *company_name* from the DB.
+
+    Returns a list of tuples: (company_name, analysis_type, model, response, timestamp).
+    Returns an empty list when no results exist.
    """
-    import csv
-    import io
-
-    db = get_db_client()
-    # Query all non-cached analysis results for this company
    with db.get_conn() as conn:
        with conn.cursor() as cur:
            cur.execute(
@@ -719,43 +705,24 @@ async def export_company_csv(
                """,
                (company_name,),
            )
-            rows = cur.fetchall()
+            return cur.fetchall()

-    if not rows:
-        raise HTTPException(status_code=404, detail=f"No analysis results found for '{company_name}'")
+
+def _build_company_csv(rows) -> bytes:
+    """Render *rows* as CSV bytes."""
+    import csv
+    import io

    output = io.StringIO()
    writer = csv.writer(output)
    writer.writerow(["company_name", "analysis_type", "model", "analysis", "timestamp"])
    for row in rows:
        writer.writerow(row)
-
-    output.seek(0)
-    safe_name = company_name.replace(" ", "_").lower()
-    return StreamingResponse(
-        iter([output.getvalue()]),
-        media_type="text/csv",
-        headers={"Content-Disposition": f'attachment; filename="sparc_{safe_name}_export.csv"'},
-    )
+    return output.getvalue().encode("utf-8")


-@app.get("/export/{company_name}/pdf", tags=["Export"])
-async def export_company_pdf(
-    company_name: Annotated[str, Path(min_length=2, max_length=128, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$")],
-    _: UserResponse = Depends(get_current_user),
-):
-    """Export analysis results for a company as a formatted PDF report.
-
-    Returns all stored analysis records for the given company, including
-    analysis type, model used, response text, and timestamp, formatted
-    as a downloadable PDF document.
-
-    Args:
-        company_name: Company name to export results for
-
-    Returns:
-        PDF file download
-    """
+def _build_company_pdf(rows, company_name: str) -> bytes:
+    """Render *rows* as PDF bytes using reportlab."""
    import io

    from reportlab.lib import colors
@@ -770,23 +737,6 @@ async def export_company_pdf(
        TableStyle,
    )

-    db = get_db_client()
-    with db.get_conn() as conn:
-        with conn.cursor() as cur:
-            cur.execute(
-                """
-                SELECT company_name, analysis_type, model, response, timestamp
-                FROM llm_messages
-                WHERE LOWER(company_name) = LOWER(%s) AND is_cached = FALSE
-                ORDER BY timestamp DESC
-                """,
-                (company_name,),
-            )
-            rows = cur.fetchall()
-
-    if not rows:
-        raise HTTPException(status_code=404, detail=f"No analysis results found for '{company_name}'")
-
    buffer = io.BytesIO()
    doc = SimpleDocTemplate(
        buffer,
@@ -829,13 +779,11 @@ async def export_company_pdf(

    elements = []

-    # Title and date
-    display_name = rows[0][0]  # Use the casing from the database
+    display_name = rows[0][0]
    analysis_date = datetime.now().strftime("%Y-%m-%d")
    elements.append(Paragraph(f"SPARC Analysis Report: {display_name}", title_style))
    elements.append(Paragraph(f"Generated on {analysis_date}", subtitle_style))

-    # Summary table
    summary_data = [
        ["Total Analyses", str(len(rows))],
        ["Analysis Types", ", ".join(sorted(set(r[1] for r in rows)))],
@@ -857,7 +805,6 @@ async def export_company_pdf(
    elements.append(summary_table)
    elements.append(Spacer(1, 16))

-    # Individual analysis sections
    for i, row in enumerate(rows, 1):
        _, analysis_type, model, response, timestamp = row
        ts_str = timestamp.strftime("%Y-%m-%d %H:%M:%S") if hasattr(timestamp, "strftime") else str(timestamp)
@@ -869,13 +816,11 @@ async def export_company_pdf(
            Paragraph(f"<i>Performed: {ts_str}</i>", body_style)
        )

-        # Wrap long response text into paragraphs, escaping XML special chars
        safe_response = (
            response.replace("&", "&amp;")
            .replace("<", "&lt;")
            .replace(">", "&gt;")
        )
-        # Split into manageable paragraphs to avoid overflow
        for line in safe_response.split("\n"):
            if line.strip():
                elements.append(Paragraph(line, body_style))
@@ -886,11 +831,133 @@ async def export_company_pdf(

    doc.build(elements)
    buffer.seek(0)
+    return buffer.getvalue()
+
+
+@app.post("/export/batch", tags=["Export"])
+async def export_batch_zip(
+    request: BatchExportRequest,
+    _: UserResponse = Depends(get_current_user),
+):
+    """Export analysis results for multiple companies as a ZIP archive.
+
+    For each company in the request, fetches all stored analysis records and
+    adds a per-company file (CSV or PDF) to the archive. Companies with no
+    stored results are skipped; a ``manifest.json`` inside the ZIP lists both
+    the exported and skipped companies.
+
+    Args:
+        request: List of company names and desired export format ('csv' or 'pdf')
+
+    Returns:
+        ZIP archive download containing one file per found company plus a manifest
+    """
+    import io
+    import json
+    import zipfile
+
+    db = get_db_client()
+    export_date = datetime.now().strftime("%Y-%m-%d")
+    fmt = request.format
+
+    exported: list[str] = []
+    skipped: list[str] = []
+
+    zip_buffer = io.BytesIO()
+    with zipfile.ZipFile(zip_buffer, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
+        for company_name in request.companies:
+            rows = _fetch_company_rows(db, company_name)
+            if not rows:
+                skipped.append(company_name)
+                continue
+
+            safe_name = company_name.replace(" ", "_").lower()
+            if fmt == "pdf":
+                file_bytes = _build_company_pdf(rows, company_name)
+                filename = f"{safe_name}-analysis-{export_date}.pdf"
+            else:
+                file_bytes = _build_company_csv(rows)
+                filename = f"sparc_{safe_name}_export.csv"
+
+            zf.writestr(filename, file_bytes)
+            exported.append(company_name)
+
+        # Always include a manifest
+        manifest = {
+            "export_date": export_date,
+            "format": fmt,
+            "exported": exported,
+            "skipped": skipped,
+        }
+        zf.writestr("manifest.json", json.dumps(manifest, indent=2))
+
+    zip_buffer.seek(0)
+    zip_filename = f"sparc-export-{export_date}.zip"
+    return StreamingResponse(
+        iter([zip_buffer.getvalue()]),
+        media_type="application/zip",
+        headers={"Content-Disposition": f'attachment; filename="{zip_filename}"'},
+    )
+
+
+@app.get("/export/{company_name}", tags=["Export"])
+async def export_company_csv(
+    company_name: Annotated[str, Path(min_length=2, max_length=100, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$")],
+    _: UserResponse = Depends(get_current_user),
+):
+    """Export analysis results for a company as a CSV file.
+
+    Returns all stored analysis records for the given company, including
+    analysis type, model used, response text, and timestamp.
+
+    Args:
+        company_name: Company name to export results for
+
+    Returns:
+        CSV file download
+    """
+    db = get_db_client()
+    rows = _fetch_company_rows(db, company_name)
+
+    if not rows:
+        raise HTTPException(status_code=404, detail=f"No analysis results found for '{company_name}'")

    safe_name = company_name.replace(" ", "_").lower()
+    return StreamingResponse(
+        iter([_build_company_csv(rows)]),
+        media_type="text/csv",
+        headers={"Content-Disposition": f'attachment; filename="sparc_{safe_name}_export.csv"'},
+    )
+
+
+@app.get("/export/{company_name}/pdf", tags=["Export"])
+async def export_company_pdf(
+    company_name: Annotated[str, Path(min_length=2, max_length=100, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$")],
+    _: UserResponse = Depends(get_current_user),
+):
+    """Export analysis results for a company as a formatted PDF report.
+
+    Returns all stored analysis records for the given company, including
+    analysis type, model used, response text, and timestamp, formatted
+    as a downloadable PDF document.
+
+    Args:
+        company_name: Company name to export results for
+
+    Returns:
+        PDF file download
+    """
+    db = get_db_client()
+    rows = _fetch_company_rows(db, company_name)
+
+    if not rows:
+        raise HTTPException(status_code=404, detail=f"No analysis results found for '{company_name}'")
+
+    safe_name = company_name.replace(" ", "_").lower()
+    analysis_date = datetime.now().strftime("%Y-%m-%d")
    filename = f"{safe_name}-analysis-{analysis_date}.pdf"
    return StreamingResponse(
-        iter([buffer.getvalue()]),
+        iter([_build_company_pdf(rows, company_name)]),
        media_type="application/pdf",
        headers={"Content-Disposition": f'attachment; filename="{filename}"'},
    )
@@ -915,7 +982,7 @@ async def health_check():
    tags=["Analysis"],
 )
 async def analyze_company(
-    company_name: Annotated[str, Path(min_length=2, max_length=128, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$")],
+    company_name: Annotated[str, Path(min_length=2, max_length=100, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$")],
    model: str | None = Query(default=None, description="LLM model to use (e.g. 'openai/gpt-4o'). Defaults to server config."),
    _: UserResponse = Depends(get_current_user),
 ):
@@ -945,7 +1012,7 @@ async def analyze_company(
 )
 async def analyze_single_patent(
    patent_id: str,
-    company_name: Annotated[str, Query(min_length=2, max_length=128, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$", description="Company name for analysis context")],
+    company_name: Annotated[str, Query(min_length=2, max_length=100, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$", description="Company name for analysis context")],
    _: UserResponse = Depends(get_current_user),
 ):
    """Analyze a single patent by its publication ID.
@@ -979,7 +1046,7 @@ async def analyze_single_patent(
 async def list_analysis_results(
    company_name: Annotated[
        str | None,
-        _COMPANY_NAME_FILTER_QUERY,
+        Query(description="Filter results by company name"),
    ] = None,
    limit: Annotated[int, Query(ge=1, le=200)] = 50,
    cursor: Annotated[
@@ -0,0 +1,373 @@
+"""Tests for POST /export/batch endpoint (issue #1674).
+
+Covers:
+- Single company export (CSV + PDF)
+- Multiple company export
+- All-missing companies (every requested company is skipped)
+- Unauthenticated / invalid-token requests
+- Manifest content validation
+- Invalid format rejection
+"""
+
+import io
+import json
+import zipfile
+from datetime import datetime, timezone
+from unittest.mock import MagicMock, patch
+
+import pytest
+from fastapi.testclient import TestClient
+
+from SPARC.api import app
+from SPARC.auth import create_access_token
+
+
+@pytest.fixture
+def client():
+    """Create a FastAPI test client."""
+    return TestClient(app)
+
+
+@pytest.fixture(autouse=True)
+def mock_db():
+    """Mock database client for all tests in this module."""
+    db = MagicMock()
+
+    # Auth: user always exists
+    db.get_user_by_id.return_value = {
+        "id": 1,
+        "email": "user@test.com",
+        "role": "user",
+        "created_at": datetime(2025, 1, 1, tzinfo=timezone.utc),
+    }
+
+    # Default cursor mock (overridden per-test via side_effect or return_value)
+    mock_cursor = MagicMock()
+    mock_conn = MagicMock()
+    mock_conn.cursor.return_value.__enter__ = MagicMock(return_value=mock_cursor)
+    mock_conn.cursor.return_value.__exit__ = MagicMock(return_value=False)
+    db.get_conn.return_value.__enter__ = MagicMock(return_value=mock_conn)
+    db.get_conn.return_value.__exit__ = MagicMock(return_value=False)
+    db._mock_cursor = mock_cursor
+
+    with patch("SPARC.api.get_db_client", return_value=db), \
+         patch("SPARC.auth.get_db_client", return_value=db):
+        yield db
+
+
+def _auth_header():
+    token = create_access_token(1, "user@test.com", "user")
+    return {"Authorization": f"Bearer {token}"}
+
+
+def _rows_for(company_name: str):
+    """Return a single sample row for the given company."""
+    return [
+        (
+            company_name,
+            "company_analysis",
+            "anthropic/claude-3.5-sonnet",
+            f"Strong patent portfolio for {company_name}.",
+            datetime(2025, 6, 15, 10, 30, 0),
+        )
+    ]
+
+
+def _open_zip(content: bytes) -> zipfile.ZipFile:
+    """Helper: wrap response bytes as a ZipFile."""
+    return zipfile.ZipFile(io.BytesIO(content))
+
+
+# ---------------------------------------------------------------------------
+# Authentication
+# ---------------------------------------------------------------------------
+
+
+class TestBatchExportAuth:
+    """Unauthenticated and invalid-token requests must be rejected."""
+
+    def test_unauthenticated_returns_401(self, client):
+        response = client.post(
+            "/export/batch",
+            json={"companies": ["NVIDIA"], "format": "csv"},
+        )
+        assert response.status_code == 401
+
+    def test_invalid_token_returns_401(self, client):
+        response = client.post(
+            "/export/batch",
+            json={"companies": ["NVIDIA"], "format": "csv"},
+            headers={"Authorization": "Bearer totally.invalid.token"},
+        )
+        assert response.status_code == 401
+
+
+# ---------------------------------------------------------------------------
+# Single company
+# ---------------------------------------------------------------------------
+
+
+class TestBatchExportSingleCompany:
+    """POST /export/batch with a single company name."""
+
+    def test_single_company_csv_returns_zip(self, client, mock_db):
+        mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA")
+
+        response = client.post(
+            "/export/batch",
+            json={"companies": ["NVIDIA"], "format": "csv"},
+            headers=_auth_header(),
+        )
+
+        assert response.status_code == 200
+        assert response.headers["content-type"] == "application/zip"
+        assert "attachment" in response.headers["content-disposition"]
+        assert "sparc-export-" in response.headers["content-disposition"]
+        assert response.headers["content-disposition"].endswith('.zip"')
+
+    def test_single_company_csv_zip_contains_csv_file(self, client, mock_db):
+        mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA")
+
+        response = client.post(
+            "/export/batch",
+            json={"companies": ["NVIDIA"], "format": "csv"},
+            headers=_auth_header(),
+        )
+
+        zf = _open_zip(response.content)
+        names = zf.namelist()
+        csv_files = [n for n in names if n.endswith(".csv")]
+        assert len(csv_files) == 1
+        assert "nvidia" in csv_files[0]
+
+    def test_single_company_csv_content_is_valid_csv(self, client, mock_db):
+        mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA")
+
+        response = client.post(
+            "/export/batch",
+            json={"companies": ["NVIDIA"], "format": "csv"},
+            headers=_auth_header(),
+        )
+
+        zf = _open_zip(response.content)
+        csv_name = [n for n in zf.namelist() if n.endswith(".csv")][0]
+        csv_text = zf.read(csv_name).decode("utf-8")
+        lines = csv_text.strip().split("\n")
+        assert lines[0].strip() == "company_name,analysis_type,model,analysis,timestamp"
+        assert "NVIDIA" in lines[1]
+
+    def test_single_company_pdf_zip_contains_pdf_file(self, client, mock_db):
+        mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA")
+
+        response = client.post(
+            "/export/batch",
+            json={"companies": ["NVIDIA"], "format": "pdf"},
+            headers=_auth_header(),
+        )
+
+        assert response.status_code == 200
+        zf = _open_zip(response.content)
+        pdf_files = [n for n in zf.namelist() if n.endswith(".pdf")]
+        assert len(pdf_files) == 1
+        # Verify it is actually a PDF (starts with %PDF)
+        pdf_bytes = zf.read(pdf_files[0])
+        assert pdf_bytes[:4] == b"%PDF"
+
+
+# ---------------------------------------------------------------------------
+# Multiple companies
+# ---------------------------------------------------------------------------
+
+
+class TestBatchExportMultipleCompanies:
+    """POST /export/batch with several companies."""
+
+    def test_multiple_companies_each_gets_a_file(self, client, mock_db):
+        companies = ["NVIDIA", "Intel", "AMD"]
+        mock_db._mock_cursor.fetchall.side_effect = [
+            _rows_for("NVIDIA"),
+            _rows_for("Intel"),
+            _rows_for("AMD"),
+        ]
+
+        response = client.post(
+            "/export/batch",
+            json={"companies": companies, "format": "csv"},
+            headers=_auth_header(),
+        )
+
+        assert response.status_code == 200
+        zf = _open_zip(response.content)
+        csv_files = [n for n in zf.namelist() if n.endswith(".csv")]
+        assert len(csv_files) == 3
+
+    def test_multiple_companies_manifest_lists_all_exported(self, client, mock_db):
+        companies = ["NVIDIA", "Intel"]
+        mock_db._mock_cursor.fetchall.side_effect = [
+            _rows_for("NVIDIA"),
+            _rows_for("Intel"),
+        ]
+
+        response = client.post(
+            "/export/batch",
+            json={"companies": companies, "format": "csv"},
+            headers=_auth_header(),
+        )
+
+        zf = _open_zip(response.content)
+        manifest = json.loads(zf.read("manifest.json"))
+        assert set(manifest["exported"]) == {"NVIDIA", "Intel"}
+        assert manifest["skipped"] == []
+        assert manifest["format"] == "csv"
+
+    def test_partial_missing_companies_skipped(self, client, mock_db):
+        """Companies with no data are skipped; others are exported."""
+        mock_db._mock_cursor.fetchall.side_effect = [
+            _rows_for("NVIDIA"),
+            [],  # no data for "UnknownCo"
+        ]
+
+        response = client.post(
+            "/export/batch",
+            json={"companies": ["NVIDIA", "UnknownCo"], "format": "csv"},
+            headers=_auth_header(),
+        )
+
+        assert response.status_code == 200
+        zf = _open_zip(response.content)
+        manifest = json.loads(zf.read("manifest.json"))
+        assert manifest["exported"] == ["NVIDIA"]
+        assert manifest["skipped"] == ["UnknownCo"]
+
+        csv_files = [n for n in zf.namelist() if n.endswith(".csv")]
+        assert len(csv_files) == 1
+
+
+# ---------------------------------------------------------------------------
+# All-missing companies
+# ---------------------------------------------------------------------------
+
+
+class TestBatchExportAllMissing:
+    """When every requested company has no data, the ZIP still returns 200
+    with only a manifest (no per-company files, all listed in skipped)."""
+
+    def test_all_missing_returns_200_with_manifest_only(self, client, mock_db):
+        mock_db._mock_cursor.fetchall.return_value = []
+
+        response = client.post(
+            "/export/batch",
+            json={"companies": ["GhostCo", "PhantomInc"], "format": "csv"},
+            headers=_auth_header(),
+        )
+
+        assert response.status_code == 200
+        zf = _open_zip(response.content)
+        assert "manifest.json" in zf.namelist()
+
+        manifest = json.loads(zf.read("manifest.json"))
+        assert manifest["exported"] == []
+        assert set(manifest["skipped"]) == {"GhostCo", "PhantomInc"}
+
+    def test_all_missing_zip_has_no_data_files(self, client, mock_db):
+        mock_db._mock_cursor.fetchall.return_value = []
+
+        response = client.post(
+            "/export/batch",
+            json={"companies": ["GhostCo"], "format": "csv"},
+            headers=_auth_header(),
+        )
+
+        zf = _open_zip(response.content)
+        data_files = [n for n in zf.namelist() if n != "manifest.json"]
+        assert data_files == []
+
+
+# ---------------------------------------------------------------------------
+# Manifest validation
+# ---------------------------------------------------------------------------
+
+
+class TestBatchExportManifest:
+    """The manifest.json inside every ZIP must be well-formed."""
+
+    def test_manifest_always_present(self, client, mock_db):
+        mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA")
+
+        response = client.post(
+            "/export/batch",
+            json={"companies": ["NVIDIA"], "format": "csv"},
+            headers=_auth_header(),
+        )
+
+        zf = _open_zip(response.content)
+        assert "manifest.json" in zf.namelist()
+
+    def test_manifest_contains_required_keys(self, client, mock_db):
+        mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA")
+
+        response = client.post(
+            "/export/batch",
+            json={"companies": ["NVIDIA"], "format": "csv"},
+            headers=_auth_header(),
+        )
+
+        zf = _open_zip(response.content)
+        manifest = json.loads(zf.read("manifest.json"))
+        assert "export_date" in manifest
+        assert "format" in manifest
+        assert "exported" in manifest
+        assert "skipped" in manifest
+
+    def test_manifest_format_field_matches_request(self, client, mock_db):
+        mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA")
+
+        response = client.post(
+            "/export/batch",
+            json={"companies": ["NVIDIA"], "format": "pdf"},
+            headers=_auth_header(),
+        )
+
+        zf = _open_zip(response.content)
+        manifest = json.loads(zf.read("manifest.json"))
+        assert manifest["format"] == "pdf"
+
+
+# ---------------------------------------------------------------------------
+# Input validation
+# ---------------------------------------------------------------------------
+
+
+class TestBatchExportInputValidation:
+    """Invalid request bodies must return 422."""
+
+    def test_invalid_format_returns_422(self, client):
+        response = client.post(
+            "/export/batch",
+            json={"companies": ["NVIDIA"], "format": "xlsx"},
+            headers=_auth_header(),
+        )
+        assert response.status_code == 422
+
+    def test_empty_companies_list_returns_422(self, client):
+        response = client.post(
+            "/export/batch",
+            json={"companies": [], "format": "csv"},
+            headers=_auth_header(),
+        )
+        assert response.status_code == 422
+
+    def test_default_format_is_csv(self, client, mock_db):
+        """Omitting `format` should default to CSV."""
+        mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA")
+
+        response = client.post(
+            "/export/batch",
+            json={"companies": ["NVIDIA"]},
+            headers=_auth_header(),
+        )
+
+        assert response.status_code == 200
+        zf = _open_zip(response.content)
+        manifest = json.loads(zf.read("manifest.json"))
+        assert manifest["format"] == "csv"
@@ -43,18 +43,12 @@ class TestCompanyNameValidation:

    # --- Too long ---

-    def test_over_128_chars_rejected(self, client, mock_analyzer):
-        """A company name longer than 128 characters should be rejected."""
-        long_name = "A" * 129
+    def test_over_100_chars_rejected(self, client, mock_analyzer):
+        """A company name longer than 100 characters should be rejected."""
+        long_name = "A" * 101
        response = client.get(f"/analyze/{long_name}")
        assert response.status_code == 422

-    def test_exactly_128_chars_accepted(self, client, mock_analyzer):
-        """A company name of exactly 128 characters should be accepted."""
-        max_name = "A" * 128
-        response = client.get(f"/analyze/{max_name}")
-        assert response.status_code != 422
-
    # --- Special characters ---

    @pytest.mark.parametrize(
@@ -101,7 +95,7 @@ class TestCompanyNameValidation:
            "3M",
            "21st Century Fox",
            "ab",  # minimum length
-            "A" * 128,  # maximum length
+            "A" * 100,  # maximum length
        ],
    )
    def test_valid_names_accepted(self, client, mock_analyzer, valid_name):
@@ -124,7 +118,7 @@ class TestCompanyNameValidation:
        """Batch endpoint should reject company names that are too long."""
        response = client.post(
            "/analyze/batch",
-            json={"companies": ["A" * 129]},
+            json={"companies": ["A" * 101]},
        )
        assert response.status_code == 422

@@ -161,30 +155,3 @@ class TestCompanyNameValidation:
            json={"companies": ["-nvidia"]},
        )
        assert response.status_code == 422
-
-    # --- GET /analyze/batch company_name filter validation ---
-
-    def test_batch_filter_special_chars_rejected(self, client, mock_analyzer):
-        """GET /analyze/batch company_name filter rejects disallowed chars."""
-        response = client.get("/analyze/batch", params={"company_name": "nvidia!"})
-        assert response.status_code == 422
-
-    def test_batch_filter_too_short_rejected(self, client, mock_analyzer):
-        """GET /analyze/batch company_name filter rejects names under 2 chars."""
-        response = client.get("/analyze/batch", params={"company_name": "X"})
-        assert response.status_code == 422
-
-    def test_batch_filter_too_long_rejected(self, client, mock_analyzer):
-        """GET /analyze/batch company_name filter rejects names over 128 chars."""
-        response = client.get("/analyze/batch", params={"company_name": "A" * 129})
-        assert response.status_code == 422
-
-    def test_batch_filter_valid_name_accepted(self, client, mock_analyzer):
-        """GET /analyze/batch company_name filter accepts a valid name."""
-        response = client.get("/analyze/batch", params={"company_name": "nvidia"})
-        assert response.status_code != 422
-
-    def test_batch_filter_omitted_accepted(self, client, mock_analyzer):
-        """GET /analyze/batch without company_name filter should work fine."""
-        response = client.get("/analyze/batch")
-        assert response.status_code != 422