Add POST /export/batch endpoint for multi-company ZIP download

Implements issue #1674: a new authenticated POST /export/batch endpoint that accepts a list of company names and an optional format (csv or pdf), compiles per-company exports into a ZIP archive using Python's zipfile module, and returns it as a streaming download. Key changes: - Extract `_fetch_company_rows`, `_build_company_csv`, `_build_company_pdf` helpers to eliminate duplication between the single-company endpoints and the new batch endpoint - Refactor `export_company_csv` and `export_company_pdf` to delegate to the new helpers - Add `BatchExportRequest` Pydantic model (companies list + format field) - Add `POST /export/batch` which iterates over companies, skips those with no data, writes per-company files into the ZIP, and always includes a `manifest.json` listing exported and skipped companies - Response header: `Content-Disposition: attachment; filename=sparc-export-<date>.zip` - 17 new tests covering: single company (CSV + PDF), multiple companies, all-missing, unauthenticated, invalid-token, manifest structure, input validation Closes leeworks-agents/SPARC#1674 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-19 15:21:09 +00:00
parent 313800215c
commit 8f40109272
2 changed files with 521 additions and 69 deletions
@@ -675,27 +675,25 @@ async def get_analytics_trends(
 # ============== Export Endpoints ==============
-@app.get("/export/{company_name}", tags=["Export"])
+class BatchExportRequest(BaseModel):
-async def export_company_csv(
+    """Request model for batch ZIP export of analysis results."""
    company_name: Annotated[str, Path(min_length=2, max_length=100, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$")],
    _: UserResponse = Depends(get_current_user),
 ):
    """Export analysis results for a company as a CSV file.
-    Returns all stored analysis records for the given company, including
+    companies: list[CompanyName] = Field(
-    analysis type, model used, response text, and timestamp.
+        ..., min_length=1, max_length=50, description="List of company names to export"
    )
    format: str = Field(
        default="csv",
        pattern="^(csv|pdf)$",
        description="Export format: 'csv' or 'pdf'",
    )
    Args:
        company_name: Company name to export results for
-    Returns:
+def _fetch_company_rows(db, company_name: str) -> list:
-        CSV file download
+    """Fetch all non-cached analysis rows for *company_name* from the DB.
    Returns a list of tuples: (company_name, analysis_type, model, response, timestamp).
    Returns an empty list when no results exist.
    """
    import csv
    import io
    db = get_db_client()
    # Query all non-cached analysis results for this company
    with db.get_conn() as conn:
        with conn.cursor() as cur:
            cur.execute(
@@ -707,43 +705,24 @@ async def export_company_csv(
                """,
                (company_name,),
            )
-            rows = cur.fetchall()
+            return cur.fetchall()
-    if not rows:
+
-        raise HTTPException(status_code=404, detail=f"No analysis results found for '{company_name}'")
+def _build_company_csv(rows) -> bytes:
    """Render *rows* as CSV bytes."""
    import csv
    import io
    output = io.StringIO()
    writer = csv.writer(output)
    writer.writerow(["company_name", "analysis_type", "model", "analysis", "timestamp"])
    for row in rows:
        writer.writerow(row)
-
+    return output.getvalue().encode("utf-8")
    output.seek(0)
    safe_name = company_name.replace(" ", "_").lower()
    return StreamingResponse(
        iter([output.getvalue()]),
        media_type="text/csv",
        headers={"Content-Disposition": f'attachment; filename="sparc_{safe_name}_export.csv"'},
    )
-@app.get("/export/{company_name}/pdf", tags=["Export"])
+def _build_company_pdf(rows, company_name: str) -> bytes:
-async def export_company_pdf(
+    """Render *rows* as PDF bytes using reportlab."""
    company_name: Annotated[str, Path(min_length=2, max_length=100, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$")],
    _: UserResponse = Depends(get_current_user),
 ):
    """Export analysis results for a company as a formatted PDF report.
    Returns all stored analysis records for the given company, including
    analysis type, model used, response text, and timestamp, formatted
    as a downloadable PDF document.
    Args:
        company_name: Company name to export results for
    Returns:
        PDF file download
    """
    import io
    from reportlab.lib import colors
@@ -758,23 +737,6 @@ async def export_company_pdf(
        TableStyle,
    )
    db = get_db_client()
    with db.get_conn() as conn:
        with conn.cursor() as cur:
            cur.execute(
                """
                SELECT company_name, analysis_type, model, response, timestamp
                FROM llm_messages
                WHERE LOWER(company_name) = LOWER(%s) AND is_cached = FALSE
                ORDER BY timestamp DESC
                """,
                (company_name,),
            )
            rows = cur.fetchall()
    if not rows:
        raise HTTPException(status_code=404, detail=f"No analysis results found for '{company_name}'")
    buffer = io.BytesIO()
    doc = SimpleDocTemplate(
        buffer,
@@ -817,13 +779,11 @@ async def export_company_pdf(
    elements = []
-    # Title and date
+    display_name = rows[0][0]
    display_name = rows[0][0]  # Use the casing from the database
    analysis_date = datetime.now().strftime("%Y-%m-%d")
    elements.append(Paragraph(f"SPARC Analysis Report: {display_name}", title_style))
    elements.append(Paragraph(f"Generated on {analysis_date}", subtitle_style))
    # Summary table
    summary_data = [
        ["Total Analyses", str(len(rows))],
        ["Analysis Types", ", ".join(sorted(set(r[1] for r in rows)))],
@@ -845,7 +805,6 @@ async def export_company_pdf(
    elements.append(summary_table)
    elements.append(Spacer(1, 16))
    # Individual analysis sections
    for i, row in enumerate(rows, 1):
        _, analysis_type, model, response, timestamp = row
        ts_str = timestamp.strftime("%Y-%m-%d %H:%M:%S") if hasattr(timestamp, "strftime") else str(timestamp)
@@ -857,13 +816,11 @@ async def export_company_pdf(
            Paragraph(f"<i>Performed: {ts_str}</i>", body_style)
        )
        # Wrap long response text into paragraphs, escaping XML special chars
        safe_response = (
            response.replace("&", "&amp;")
            .replace("<", "&lt;")
            .replace(">", "&gt;")
        )
        # Split into manageable paragraphs to avoid overflow
        for line in safe_response.split("\n"):
            if line.strip():
                elements.append(Paragraph(line, body_style))
@@ -874,11 +831,133 @@ async def export_company_pdf(
    doc.build(elements)
    buffer.seek(0)
    return buffer.getvalue()
@app.post("/export/batch", tags=["Export"])
 async def export_batch_zip(
    request: BatchExportRequest,
    _: UserResponse = Depends(get_current_user),
 ):
    """Export analysis results for multiple companies as a ZIP archive.
    For each company in the request, fetches all stored analysis records and
    adds a per-company file (CSV or PDF) to the archive. Companies with no
    stored results are skipped; a ``manifest.json`` inside the ZIP lists both
    the exported and skipped companies.
    Args:
        request: List of company names and desired export format ('csv' or 'pdf')
    Returns:
        ZIP archive download containing one file per found company plus a manifest
    """
    import io
    import json
    import zipfile
    db = get_db_client()
    export_date = datetime.now().strftime("%Y-%m-%d")
    fmt = request.format
    exported: list[str] = []
    skipped: list[str] = []
    zip_buffer = io.BytesIO()
    with zipfile.ZipFile(zip_buffer, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
        for company_name in request.companies:
            rows = _fetch_company_rows(db, company_name)
            if not rows:
                skipped.append(company_name)
                continue
            safe_name = company_name.replace(" ", "_").lower()
            if fmt == "pdf":
                file_bytes = _build_company_pdf(rows, company_name)
                filename = f"{safe_name}-analysis-{export_date}.pdf"
            else:
                file_bytes = _build_company_csv(rows)
                filename = f"sparc_{safe_name}_export.csv"
            zf.writestr(filename, file_bytes)
            exported.append(company_name)
        # Always include a manifest
        manifest = {
            "export_date": export_date,
            "format": fmt,
            "exported": exported,
            "skipped": skipped,
        }
        zf.writestr("manifest.json", json.dumps(manifest, indent=2))
    zip_buffer.seek(0)
    zip_filename = f"sparc-export-{export_date}.zip"
    return StreamingResponse(
        iter([zip_buffer.getvalue()]),
        media_type="application/zip",
        headers={"Content-Disposition": f'attachment; filename="{zip_filename}"'},
    )
@app.get("/export/{company_name}", tags=["Export"])
 async def export_company_csv(
    company_name: Annotated[str, Path(min_length=2, max_length=100, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$")],
    _: UserResponse = Depends(get_current_user),
 ):
    """Export analysis results for a company as a CSV file.
    Returns all stored analysis records for the given company, including
    analysis type, model used, response text, and timestamp.
    Args:
        company_name: Company name to export results for
    Returns:
        CSV file download
    """
    db = get_db_client()
    rows = _fetch_company_rows(db, company_name)
    if not rows:
        raise HTTPException(status_code=404, detail=f"No analysis results found for '{company_name}'")
    safe_name = company_name.replace(" ", "_").lower()
    return StreamingResponse(
        iter([_build_company_csv(rows)]),
        media_type="text/csv",
        headers={"Content-Disposition": f'attachment; filename="sparc_{safe_name}_export.csv"'},
    )
@app.get("/export/{company_name}/pdf", tags=["Export"])
 async def export_company_pdf(
    company_name: Annotated[str, Path(min_length=2, max_length=100, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$")],
    _: UserResponse = Depends(get_current_user),
 ):
    """Export analysis results for a company as a formatted PDF report.
    Returns all stored analysis records for the given company, including
    analysis type, model used, response text, and timestamp, formatted
    as a downloadable PDF document.
    Args:
        company_name: Company name to export results for
    Returns:
        PDF file download
    """
    db = get_db_client()
    rows = _fetch_company_rows(db, company_name)
    if not rows:
        raise HTTPException(status_code=404, detail=f"No analysis results found for '{company_name}'")
    safe_name = company_name.replace(" ", "_").lower()
    analysis_date = datetime.now().strftime("%Y-%m-%d")
    filename = f"{safe_name}-analysis-{analysis_date}.pdf"
    return StreamingResponse(
-        iter([buffer.getvalue()]),
+        iter([_build_company_pdf(rows, company_name)]),
        media_type="application/pdf",
        headers={"Content-Disposition": f'attachment; filename="{filename}"'},
    )
@@ -0,0 +1,373 @@
 """Tests for POST /export/batch endpoint (issue #1674).
 Covers:
 - Single company export (CSV + PDF)
 - Multiple company export
 - All-missing companies (every requested company is skipped)
 - Unauthenticated / invalid-token requests
 - Manifest content validation
 - Invalid format rejection
 """
 import io
 import json
 import zipfile
 from datetime import datetime, timezone
 from unittest.mock import MagicMock, patch
 import pytest
 from fastapi.testclient import TestClient
 from SPARC.api import app
 from SPARC.auth import create_access_token
@pytest.fixture
 def client():
    """Create a FastAPI test client."""
    return TestClient(app)
@pytest.fixture(autouse=True)
 def mock_db():
    """Mock database client for all tests in this module."""
    db = MagicMock()
    # Auth: user always exists
    db.get_user_by_id.return_value = {
        "id": 1,
        "email": "user@test.com",
        "role": "user",
        "created_at": datetime(2025, 1, 1, tzinfo=timezone.utc),
    }
    # Default cursor mock (overridden per-test via side_effect or return_value)
    mock_cursor = MagicMock()
    mock_conn = MagicMock()
    mock_conn.cursor.return_value.__enter__ = MagicMock(return_value=mock_cursor)
    mock_conn.cursor.return_value.__exit__ = MagicMock(return_value=False)
    db.get_conn.return_value.__enter__ = MagicMock(return_value=mock_conn)
    db.get_conn.return_value.__exit__ = MagicMock(return_value=False)
    db._mock_cursor = mock_cursor
    with patch("SPARC.api.get_db_client", return_value=db), \
         patch("SPARC.auth.get_db_client", return_value=db):
        yield db
 def _auth_header():
    token = create_access_token(1, "user@test.com", "user")
    return {"Authorization": f"Bearer {token}"}
 def _rows_for(company_name: str):
    """Return a single sample row for the given company."""
    return [
        (
            company_name,
            "company_analysis",
            "anthropic/claude-3.5-sonnet",
            f"Strong patent portfolio for {company_name}.",
            datetime(2025, 6, 15, 10, 30, 0),
        )
    ]
 def _open_zip(content: bytes) -> zipfile.ZipFile:
    """Helper: wrap response bytes as a ZipFile."""
    return zipfile.ZipFile(io.BytesIO(content))
 # ---------------------------------------------------------------------------
 # Authentication
 # ---------------------------------------------------------------------------
 class TestBatchExportAuth:
    """Unauthenticated and invalid-token requests must be rejected."""
    def test_unauthenticated_returns_401(self, client):
        response = client.post(
            "/export/batch",
            json={"companies": ["NVIDIA"], "format": "csv"},
        )
        assert response.status_code == 401
    def test_invalid_token_returns_401(self, client):
        response = client.post(
            "/export/batch",
            json={"companies": ["NVIDIA"], "format": "csv"},
            headers={"Authorization": "Bearer totally.invalid.token"},
        )
        assert response.status_code == 401
 # ---------------------------------------------------------------------------
 # Single company
 # ---------------------------------------------------------------------------
 class TestBatchExportSingleCompany:
    """POST /export/batch with a single company name."""
    def test_single_company_csv_returns_zip(self, client, mock_db):
        mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA")
        response = client.post(
            "/export/batch",
            json={"companies": ["NVIDIA"], "format": "csv"},
            headers=_auth_header(),
        )
        assert response.status_code == 200
        assert response.headers["content-type"] == "application/zip"
        assert "attachment" in response.headers["content-disposition"]
        assert "sparc-export-" in response.headers["content-disposition"]
        assert response.headers["content-disposition"].endswith('.zip"')
    def test_single_company_csv_zip_contains_csv_file(self, client, mock_db):
        mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA")
        response = client.post(
            "/export/batch",
            json={"companies": ["NVIDIA"], "format": "csv"},
            headers=_auth_header(),
        )
        zf = _open_zip(response.content)
        names = zf.namelist()
        csv_files = [n for n in names if n.endswith(".csv")]
        assert len(csv_files) == 1
        assert "nvidia" in csv_files[0]
    def test_single_company_csv_content_is_valid_csv(self, client, mock_db):
        mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA")
        response = client.post(
            "/export/batch",
            json={"companies": ["NVIDIA"], "format": "csv"},
            headers=_auth_header(),
        )
        zf = _open_zip(response.content)
        csv_name = [n for n in zf.namelist() if n.endswith(".csv")][0]
        csv_text = zf.read(csv_name).decode("utf-8")
        lines = csv_text.strip().split("\n")
        assert lines[0].strip() == "company_name,analysis_type,model,analysis,timestamp"
        assert "NVIDIA" in lines[1]
    def test_single_company_pdf_zip_contains_pdf_file(self, client, mock_db):
        mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA")
        response = client.post(
            "/export/batch",
            json={"companies": ["NVIDIA"], "format": "pdf"},
            headers=_auth_header(),
        )
        assert response.status_code == 200
        zf = _open_zip(response.content)
        pdf_files = [n for n in zf.namelist() if n.endswith(".pdf")]
        assert len(pdf_files) == 1
        # Verify it is actually a PDF (starts with %PDF)
        pdf_bytes = zf.read(pdf_files[0])
        assert pdf_bytes[:4] == b"%PDF"
 # ---------------------------------------------------------------------------
 # Multiple companies
 # ---------------------------------------------------------------------------
 class TestBatchExportMultipleCompanies:
    """POST /export/batch with several companies."""
    def test_multiple_companies_each_gets_a_file(self, client, mock_db):
        companies = ["NVIDIA", "Intel", "AMD"]
        mock_db._mock_cursor.fetchall.side_effect = [
            _rows_for("NVIDIA"),
            _rows_for("Intel"),
            _rows_for("AMD"),
        ]
        response = client.post(
            "/export/batch",
            json={"companies": companies, "format": "csv"},
            headers=_auth_header(),
        )
        assert response.status_code == 200
        zf = _open_zip(response.content)
        csv_files = [n for n in zf.namelist() if n.endswith(".csv")]
        assert len(csv_files) == 3
    def test_multiple_companies_manifest_lists_all_exported(self, client, mock_db):
        companies = ["NVIDIA", "Intel"]
        mock_db._mock_cursor.fetchall.side_effect = [
            _rows_for("NVIDIA"),
            _rows_for("Intel"),
        ]
        response = client.post(
            "/export/batch",
            json={"companies": companies, "format": "csv"},
            headers=_auth_header(),
        )
        zf = _open_zip(response.content)
        manifest = json.loads(zf.read("manifest.json"))
        assert set(manifest["exported"]) == {"NVIDIA", "Intel"}
        assert manifest["skipped"] == []
        assert manifest["format"] == "csv"
    def test_partial_missing_companies_skipped(self, client, mock_db):
        """Companies with no data are skipped; others are exported."""
        mock_db._mock_cursor.fetchall.side_effect = [
            _rows_for("NVIDIA"),
            [],  # no data for "UnknownCo"
        ]
        response = client.post(
            "/export/batch",
            json={"companies": ["NVIDIA", "UnknownCo"], "format": "csv"},
            headers=_auth_header(),
        )
        assert response.status_code == 200
        zf = _open_zip(response.content)
        manifest = json.loads(zf.read("manifest.json"))
        assert manifest["exported"] == ["NVIDIA"]
        assert manifest["skipped"] == ["UnknownCo"]
        csv_files = [n for n in zf.namelist() if n.endswith(".csv")]
        assert len(csv_files) == 1
 # ---------------------------------------------------------------------------
 # All-missing companies
 # ---------------------------------------------------------------------------
 class TestBatchExportAllMissing:
    """When every requested company has no data, the ZIP still returns 200
    with only a manifest (no per-company files, all listed in skipped)."""
    def test_all_missing_returns_200_with_manifest_only(self, client, mock_db):
        mock_db._mock_cursor.fetchall.return_value = []
        response = client.post(
            "/export/batch",
            json={"companies": ["GhostCo", "PhantomInc"], "format": "csv"},
            headers=_auth_header(),
        )
        assert response.status_code == 200
        zf = _open_zip(response.content)
        assert "manifest.json" in zf.namelist()
        manifest = json.loads(zf.read("manifest.json"))
        assert manifest["exported"] == []
        assert set(manifest["skipped"]) == {"GhostCo", "PhantomInc"}
    def test_all_missing_zip_has_no_data_files(self, client, mock_db):
        mock_db._mock_cursor.fetchall.return_value = []
        response = client.post(
            "/export/batch",
            json={"companies": ["GhostCo"], "format": "csv"},
            headers=_auth_header(),
        )
        zf = _open_zip(response.content)
        data_files = [n for n in zf.namelist() if n != "manifest.json"]
        assert data_files == []
 # ---------------------------------------------------------------------------
 # Manifest validation
 # ---------------------------------------------------------------------------
 class TestBatchExportManifest:
    """The manifest.json inside every ZIP must be well-formed."""
    def test_manifest_always_present(self, client, mock_db):
        mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA")
        response = client.post(
            "/export/batch",
            json={"companies": ["NVIDIA"], "format": "csv"},
            headers=_auth_header(),
        )
        zf = _open_zip(response.content)
        assert "manifest.json" in zf.namelist()
    def test_manifest_contains_required_keys(self, client, mock_db):
        mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA")
        response = client.post(
            "/export/batch",
            json={"companies": ["NVIDIA"], "format": "csv"},
            headers=_auth_header(),
        )
        zf = _open_zip(response.content)
        manifest = json.loads(zf.read("manifest.json"))
        assert "export_date" in manifest
        assert "format" in manifest
        assert "exported" in manifest
        assert "skipped" in manifest
    def test_manifest_format_field_matches_request(self, client, mock_db):
        mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA")
        response = client.post(
            "/export/batch",
            json={"companies": ["NVIDIA"], "format": "pdf"},
            headers=_auth_header(),
        )
        zf = _open_zip(response.content)
        manifest = json.loads(zf.read("manifest.json"))
        assert manifest["format"] == "pdf"
 # ---------------------------------------------------------------------------
 # Input validation
 # ---------------------------------------------------------------------------
 class TestBatchExportInputValidation:
    """Invalid request bodies must return 422."""
    def test_invalid_format_returns_422(self, client):
        response = client.post(
            "/export/batch",
            json={"companies": ["NVIDIA"], "format": "xlsx"},
            headers=_auth_header(),
        )
        assert response.status_code == 422
    def test_empty_companies_list_returns_422(self, client):
        response = client.post(
            "/export/batch",
            json={"companies": [], "format": "csv"},
            headers=_auth_header(),
        )
        assert response.status_code == 422
    def test_default_format_is_csv(self, client, mock_db):
        """Omitting `format` should default to CSV."""
        mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA")
        response = client.post(
            "/export/batch",
            json={"companies": ["NVIDIA"]},
            headers=_auth_header(),
        )
        assert response.status_code == 200
        zf = _open_zip(response.content)
        manifest = json.loads(zf.read("manifest.json"))
        assert manifest["format"] == "csv"