diff --git a/SPARC/api.py b/SPARC/api.py index 1b29d38..f5d87b8 100644 --- a/SPARC/api.py +++ b/SPARC/api.py @@ -675,27 +675,25 @@ async def get_analytics_trends( # ============== Export Endpoints ============== -@app.get("/export/{company_name}", tags=["Export"]) -async def export_company_csv( - company_name: Annotated[str, Path(min_length=2, max_length=100, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$")], - _: UserResponse = Depends(get_current_user), -): - """Export analysis results for a company as a CSV file. +class BatchExportRequest(BaseModel): + """Request model for batch ZIP export of analysis results.""" - Returns all stored analysis records for the given company, including - analysis type, model used, response text, and timestamp. + companies: list[CompanyName] = Field( + ..., min_length=1, max_length=50, description="List of company names to export" + ) + format: str = Field( + default="csv", + pattern="^(csv|pdf)$", + description="Export format: 'csv' or 'pdf'", + ) - Args: - company_name: Company name to export results for - Returns: - CSV file download +def _fetch_company_rows(db, company_name: str) -> list: + """Fetch all non-cached analysis rows for *company_name* from the DB. + + Returns a list of tuples: (company_name, analysis_type, model, response, timestamp). + Returns an empty list when no results exist. """ - import csv - import io - - db = get_db_client() - # Query all non-cached analysis results for this company with db.get_conn() as conn: with conn.cursor() as cur: cur.execute( @@ -707,43 +705,24 @@ async def export_company_csv( """, (company_name,), ) - rows = cur.fetchall() + return cur.fetchall() - if not rows: - raise HTTPException(status_code=404, detail=f"No analysis results found for '{company_name}'") + +def _build_company_csv(rows) -> bytes: + """Render *rows* as CSV bytes.""" + import csv + import io output = io.StringIO() writer = csv.writer(output) writer.writerow(["company_name", "analysis_type", "model", "analysis", "timestamp"]) for row in rows: writer.writerow(row) - - output.seek(0) - safe_name = company_name.replace(" ", "_").lower() - return StreamingResponse( - iter([output.getvalue()]), - media_type="text/csv", - headers={"Content-Disposition": f'attachment; filename="sparc_{safe_name}_export.csv"'}, - ) + return output.getvalue().encode("utf-8") -@app.get("/export/{company_name}/pdf", tags=["Export"]) -async def export_company_pdf( - company_name: Annotated[str, Path(min_length=2, max_length=100, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$")], - _: UserResponse = Depends(get_current_user), -): - """Export analysis results for a company as a formatted PDF report. - - Returns all stored analysis records for the given company, including - analysis type, model used, response text, and timestamp, formatted - as a downloadable PDF document. - - Args: - company_name: Company name to export results for - - Returns: - PDF file download - """ +def _build_company_pdf(rows, company_name: str) -> bytes: + """Render *rows* as PDF bytes using reportlab.""" import io from reportlab.lib import colors @@ -758,23 +737,6 @@ async def export_company_pdf( TableStyle, ) - db = get_db_client() - with db.get_conn() as conn: - with conn.cursor() as cur: - cur.execute( - """ - SELECT company_name, analysis_type, model, response, timestamp - FROM llm_messages - WHERE LOWER(company_name) = LOWER(%s) AND is_cached = FALSE - ORDER BY timestamp DESC - """, - (company_name,), - ) - rows = cur.fetchall() - - if not rows: - raise HTTPException(status_code=404, detail=f"No analysis results found for '{company_name}'") - buffer = io.BytesIO() doc = SimpleDocTemplate( buffer, @@ -817,13 +779,11 @@ async def export_company_pdf( elements = [] - # Title and date - display_name = rows[0][0] # Use the casing from the database + display_name = rows[0][0] analysis_date = datetime.now().strftime("%Y-%m-%d") elements.append(Paragraph(f"SPARC Analysis Report: {display_name}", title_style)) elements.append(Paragraph(f"Generated on {analysis_date}", subtitle_style)) - # Summary table summary_data = [ ["Total Analyses", str(len(rows))], ["Analysis Types", ", ".join(sorted(set(r[1] for r in rows)))], @@ -845,7 +805,6 @@ async def export_company_pdf( elements.append(summary_table) elements.append(Spacer(1, 16)) - # Individual analysis sections for i, row in enumerate(rows, 1): _, analysis_type, model, response, timestamp = row ts_str = timestamp.strftime("%Y-%m-%d %H:%M:%S") if hasattr(timestamp, "strftime") else str(timestamp) @@ -857,13 +816,11 @@ async def export_company_pdf( Paragraph(f"Performed: {ts_str}", body_style) ) - # Wrap long response text into paragraphs, escaping XML special chars safe_response = ( response.replace("&", "&") .replace("<", "<") .replace(">", ">") ) - # Split into manageable paragraphs to avoid overflow for line in safe_response.split("\n"): if line.strip(): elements.append(Paragraph(line, body_style)) @@ -874,11 +831,133 @@ async def export_company_pdf( doc.build(elements) buffer.seek(0) + return buffer.getvalue() + + +@app.post("/export/batch", tags=["Export"]) +async def export_batch_zip( + request: BatchExportRequest, + _: UserResponse = Depends(get_current_user), +): + """Export analysis results for multiple companies as a ZIP archive. + + For each company in the request, fetches all stored analysis records and + adds a per-company file (CSV or PDF) to the archive. Companies with no + stored results are skipped; a ``manifest.json`` inside the ZIP lists both + the exported and skipped companies. + + Args: + request: List of company names and desired export format ('csv' or 'pdf') + + Returns: + ZIP archive download containing one file per found company plus a manifest + """ + import io + import json + import zipfile + + db = get_db_client() + export_date = datetime.now().strftime("%Y-%m-%d") + fmt = request.format + + exported: list[str] = [] + skipped: list[str] = [] + + zip_buffer = io.BytesIO() + with zipfile.ZipFile(zip_buffer, mode="w", compression=zipfile.ZIP_DEFLATED) as zf: + for company_name in request.companies: + rows = _fetch_company_rows(db, company_name) + if not rows: + skipped.append(company_name) + continue + + safe_name = company_name.replace(" ", "_").lower() + if fmt == "pdf": + file_bytes = _build_company_pdf(rows, company_name) + filename = f"{safe_name}-analysis-{export_date}.pdf" + else: + file_bytes = _build_company_csv(rows) + filename = f"sparc_{safe_name}_export.csv" + + zf.writestr(filename, file_bytes) + exported.append(company_name) + + # Always include a manifest + manifest = { + "export_date": export_date, + "format": fmt, + "exported": exported, + "skipped": skipped, + } + zf.writestr("manifest.json", json.dumps(manifest, indent=2)) + + zip_buffer.seek(0) + zip_filename = f"sparc-export-{export_date}.zip" + return StreamingResponse( + iter([zip_buffer.getvalue()]), + media_type="application/zip", + headers={"Content-Disposition": f'attachment; filename="{zip_filename}"'}, + ) + + +@app.get("/export/{company_name}", tags=["Export"]) +async def export_company_csv( + company_name: Annotated[str, Path(min_length=2, max_length=100, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$")], + _: UserResponse = Depends(get_current_user), +): + """Export analysis results for a company as a CSV file. + + Returns all stored analysis records for the given company, including + analysis type, model used, response text, and timestamp. + + Args: + company_name: Company name to export results for + + Returns: + CSV file download + """ + db = get_db_client() + rows = _fetch_company_rows(db, company_name) + + if not rows: + raise HTTPException(status_code=404, detail=f"No analysis results found for '{company_name}'") safe_name = company_name.replace(" ", "_").lower() + return StreamingResponse( + iter([_build_company_csv(rows)]), + media_type="text/csv", + headers={"Content-Disposition": f'attachment; filename="sparc_{safe_name}_export.csv"'}, + ) + + +@app.get("/export/{company_name}/pdf", tags=["Export"]) +async def export_company_pdf( + company_name: Annotated[str, Path(min_length=2, max_length=100, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$")], + _: UserResponse = Depends(get_current_user), +): + """Export analysis results for a company as a formatted PDF report. + + Returns all stored analysis records for the given company, including + analysis type, model used, response text, and timestamp, formatted + as a downloadable PDF document. + + Args: + company_name: Company name to export results for + + Returns: + PDF file download + """ + db = get_db_client() + rows = _fetch_company_rows(db, company_name) + + if not rows: + raise HTTPException(status_code=404, detail=f"No analysis results found for '{company_name}'") + + safe_name = company_name.replace(" ", "_").lower() + analysis_date = datetime.now().strftime("%Y-%m-%d") filename = f"{safe_name}-analysis-{analysis_date}.pdf" return StreamingResponse( - iter([buffer.getvalue()]), + iter([_build_company_pdf(rows, company_name)]), media_type="application/pdf", headers={"Content-Disposition": f'attachment; filename="{filename}"'}, ) diff --git a/tests/test_batch_export.py b/tests/test_batch_export.py new file mode 100644 index 0000000..daa1859 --- /dev/null +++ b/tests/test_batch_export.py @@ -0,0 +1,373 @@ +"""Tests for POST /export/batch endpoint (issue #1674). + +Covers: +- Single company export (CSV + PDF) +- Multiple company export +- All-missing companies (every requested company is skipped) +- Unauthenticated / invalid-token requests +- Manifest content validation +- Invalid format rejection +""" + +import io +import json +import zipfile +from datetime import datetime, timezone +from unittest.mock import MagicMock, patch + +import pytest +from fastapi.testclient import TestClient + +from SPARC.api import app +from SPARC.auth import create_access_token + + +@pytest.fixture +def client(): + """Create a FastAPI test client.""" + return TestClient(app) + + +@pytest.fixture(autouse=True) +def mock_db(): + """Mock database client for all tests in this module.""" + db = MagicMock() + + # Auth: user always exists + db.get_user_by_id.return_value = { + "id": 1, + "email": "user@test.com", + "role": "user", + "created_at": datetime(2025, 1, 1, tzinfo=timezone.utc), + } + + # Default cursor mock (overridden per-test via side_effect or return_value) + mock_cursor = MagicMock() + mock_conn = MagicMock() + mock_conn.cursor.return_value.__enter__ = MagicMock(return_value=mock_cursor) + mock_conn.cursor.return_value.__exit__ = MagicMock(return_value=False) + db.get_conn.return_value.__enter__ = MagicMock(return_value=mock_conn) + db.get_conn.return_value.__exit__ = MagicMock(return_value=False) + db._mock_cursor = mock_cursor + + with patch("SPARC.api.get_db_client", return_value=db), \ + patch("SPARC.auth.get_db_client", return_value=db): + yield db + + +def _auth_header(): + token = create_access_token(1, "user@test.com", "user") + return {"Authorization": f"Bearer {token}"} + + +def _rows_for(company_name: str): + """Return a single sample row for the given company.""" + return [ + ( + company_name, + "company_analysis", + "anthropic/claude-3.5-sonnet", + f"Strong patent portfolio for {company_name}.", + datetime(2025, 6, 15, 10, 30, 0), + ) + ] + + +def _open_zip(content: bytes) -> zipfile.ZipFile: + """Helper: wrap response bytes as a ZipFile.""" + return zipfile.ZipFile(io.BytesIO(content)) + + +# --------------------------------------------------------------------------- +# Authentication +# --------------------------------------------------------------------------- + + +class TestBatchExportAuth: + """Unauthenticated and invalid-token requests must be rejected.""" + + def test_unauthenticated_returns_401(self, client): + response = client.post( + "/export/batch", + json={"companies": ["NVIDIA"], "format": "csv"}, + ) + assert response.status_code == 401 + + def test_invalid_token_returns_401(self, client): + response = client.post( + "/export/batch", + json={"companies": ["NVIDIA"], "format": "csv"}, + headers={"Authorization": "Bearer totally.invalid.token"}, + ) + assert response.status_code == 401 + + +# --------------------------------------------------------------------------- +# Single company +# --------------------------------------------------------------------------- + + +class TestBatchExportSingleCompany: + """POST /export/batch with a single company name.""" + + def test_single_company_csv_returns_zip(self, client, mock_db): + mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA") + + response = client.post( + "/export/batch", + json={"companies": ["NVIDIA"], "format": "csv"}, + headers=_auth_header(), + ) + + assert response.status_code == 200 + assert response.headers["content-type"] == "application/zip" + assert "attachment" in response.headers["content-disposition"] + assert "sparc-export-" in response.headers["content-disposition"] + assert response.headers["content-disposition"].endswith('.zip"') + + def test_single_company_csv_zip_contains_csv_file(self, client, mock_db): + mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA") + + response = client.post( + "/export/batch", + json={"companies": ["NVIDIA"], "format": "csv"}, + headers=_auth_header(), + ) + + zf = _open_zip(response.content) + names = zf.namelist() + csv_files = [n for n in names if n.endswith(".csv")] + assert len(csv_files) == 1 + assert "nvidia" in csv_files[0] + + def test_single_company_csv_content_is_valid_csv(self, client, mock_db): + mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA") + + response = client.post( + "/export/batch", + json={"companies": ["NVIDIA"], "format": "csv"}, + headers=_auth_header(), + ) + + zf = _open_zip(response.content) + csv_name = [n for n in zf.namelist() if n.endswith(".csv")][0] + csv_text = zf.read(csv_name).decode("utf-8") + lines = csv_text.strip().split("\n") + assert lines[0].strip() == "company_name,analysis_type,model,analysis,timestamp" + assert "NVIDIA" in lines[1] + + def test_single_company_pdf_zip_contains_pdf_file(self, client, mock_db): + mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA") + + response = client.post( + "/export/batch", + json={"companies": ["NVIDIA"], "format": "pdf"}, + headers=_auth_header(), + ) + + assert response.status_code == 200 + zf = _open_zip(response.content) + pdf_files = [n for n in zf.namelist() if n.endswith(".pdf")] + assert len(pdf_files) == 1 + # Verify it is actually a PDF (starts with %PDF) + pdf_bytes = zf.read(pdf_files[0]) + assert pdf_bytes[:4] == b"%PDF" + + +# --------------------------------------------------------------------------- +# Multiple companies +# --------------------------------------------------------------------------- + + +class TestBatchExportMultipleCompanies: + """POST /export/batch with several companies.""" + + def test_multiple_companies_each_gets_a_file(self, client, mock_db): + companies = ["NVIDIA", "Intel", "AMD"] + mock_db._mock_cursor.fetchall.side_effect = [ + _rows_for("NVIDIA"), + _rows_for("Intel"), + _rows_for("AMD"), + ] + + response = client.post( + "/export/batch", + json={"companies": companies, "format": "csv"}, + headers=_auth_header(), + ) + + assert response.status_code == 200 + zf = _open_zip(response.content) + csv_files = [n for n in zf.namelist() if n.endswith(".csv")] + assert len(csv_files) == 3 + + def test_multiple_companies_manifest_lists_all_exported(self, client, mock_db): + companies = ["NVIDIA", "Intel"] + mock_db._mock_cursor.fetchall.side_effect = [ + _rows_for("NVIDIA"), + _rows_for("Intel"), + ] + + response = client.post( + "/export/batch", + json={"companies": companies, "format": "csv"}, + headers=_auth_header(), + ) + + zf = _open_zip(response.content) + manifest = json.loads(zf.read("manifest.json")) + assert set(manifest["exported"]) == {"NVIDIA", "Intel"} + assert manifest["skipped"] == [] + assert manifest["format"] == "csv" + + def test_partial_missing_companies_skipped(self, client, mock_db): + """Companies with no data are skipped; others are exported.""" + mock_db._mock_cursor.fetchall.side_effect = [ + _rows_for("NVIDIA"), + [], # no data for "UnknownCo" + ] + + response = client.post( + "/export/batch", + json={"companies": ["NVIDIA", "UnknownCo"], "format": "csv"}, + headers=_auth_header(), + ) + + assert response.status_code == 200 + zf = _open_zip(response.content) + manifest = json.loads(zf.read("manifest.json")) + assert manifest["exported"] == ["NVIDIA"] + assert manifest["skipped"] == ["UnknownCo"] + + csv_files = [n for n in zf.namelist() if n.endswith(".csv")] + assert len(csv_files) == 1 + + +# --------------------------------------------------------------------------- +# All-missing companies +# --------------------------------------------------------------------------- + + +class TestBatchExportAllMissing: + """When every requested company has no data, the ZIP still returns 200 + with only a manifest (no per-company files, all listed in skipped).""" + + def test_all_missing_returns_200_with_manifest_only(self, client, mock_db): + mock_db._mock_cursor.fetchall.return_value = [] + + response = client.post( + "/export/batch", + json={"companies": ["GhostCo", "PhantomInc"], "format": "csv"}, + headers=_auth_header(), + ) + + assert response.status_code == 200 + zf = _open_zip(response.content) + assert "manifest.json" in zf.namelist() + + manifest = json.loads(zf.read("manifest.json")) + assert manifest["exported"] == [] + assert set(manifest["skipped"]) == {"GhostCo", "PhantomInc"} + + def test_all_missing_zip_has_no_data_files(self, client, mock_db): + mock_db._mock_cursor.fetchall.return_value = [] + + response = client.post( + "/export/batch", + json={"companies": ["GhostCo"], "format": "csv"}, + headers=_auth_header(), + ) + + zf = _open_zip(response.content) + data_files = [n for n in zf.namelist() if n != "manifest.json"] + assert data_files == [] + + +# --------------------------------------------------------------------------- +# Manifest validation +# --------------------------------------------------------------------------- + + +class TestBatchExportManifest: + """The manifest.json inside every ZIP must be well-formed.""" + + def test_manifest_always_present(self, client, mock_db): + mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA") + + response = client.post( + "/export/batch", + json={"companies": ["NVIDIA"], "format": "csv"}, + headers=_auth_header(), + ) + + zf = _open_zip(response.content) + assert "manifest.json" in zf.namelist() + + def test_manifest_contains_required_keys(self, client, mock_db): + mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA") + + response = client.post( + "/export/batch", + json={"companies": ["NVIDIA"], "format": "csv"}, + headers=_auth_header(), + ) + + zf = _open_zip(response.content) + manifest = json.loads(zf.read("manifest.json")) + assert "export_date" in manifest + assert "format" in manifest + assert "exported" in manifest + assert "skipped" in manifest + + def test_manifest_format_field_matches_request(self, client, mock_db): + mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA") + + response = client.post( + "/export/batch", + json={"companies": ["NVIDIA"], "format": "pdf"}, + headers=_auth_header(), + ) + + zf = _open_zip(response.content) + manifest = json.loads(zf.read("manifest.json")) + assert manifest["format"] == "pdf" + + +# --------------------------------------------------------------------------- +# Input validation +# --------------------------------------------------------------------------- + + +class TestBatchExportInputValidation: + """Invalid request bodies must return 422.""" + + def test_invalid_format_returns_422(self, client): + response = client.post( + "/export/batch", + json={"companies": ["NVIDIA"], "format": "xlsx"}, + headers=_auth_header(), + ) + assert response.status_code == 422 + + def test_empty_companies_list_returns_422(self, client): + response = client.post( + "/export/batch", + json={"companies": [], "format": "csv"}, + headers=_auth_header(), + ) + assert response.status_code == 422 + + def test_default_format_is_csv(self, client, mock_db): + """Omitting `format` should default to CSV.""" + mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA") + + response = client.post( + "/export/batch", + json={"companies": ["NVIDIA"]}, + headers=_auth_header(), + ) + + assert response.status_code == 200 + zf = _open_zip(response.content) + manifest = json.loads(zf.read("manifest.json")) + assert manifest["format"] == "csv"