fix: enforce max_length=128 and validate GET /analyze/batch filter

Closes leeworks-agents/SPARC#1685 - Increase CompanyName max_length from 100 to 128 everywhere (Pydantic type, Path constraints, and the inline Query on analyze/patent). - Add _COMPANY_NAME_FILTER_QUERY reusable Query annotation and apply it to the optional company_name filter on GET /analyze/batch so it is validated with the same rules as all other endpoints. - Update tests: rename test_over_100_chars_rejected → 128, add test_exactly_128_chars_accepted at the new boundary, fix batch too-long test to use 129 chars, update valid-name parametrize to use "A"*128, and add five new tests covering GET /analyze/batch filter validation (special chars, too-short, too-long, valid, omitted). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-19 15:18:09 +00:00
3 changed files with 125 additions and 532 deletions
@@ -36,16 +36,28 @@ from SPARC.auth import (
 )
 from SPARC.types import BatchAnalysisResult, CompanyAnalysisResult

-# Validated company name type: 2-100 chars, alphanumeric + spaces/hyphens/ampersands/periods only.
+# Validated company name type: 2-128 chars, alphanumeric + spaces/hyphens/ampersands/periods only.
 CompanyName = Annotated[
    str,
    StringConstraints(
        min_length=2,
-        max_length=100,
+        max_length=128,
        pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$",
    ),
 ]

+# Reusable Query constraint for optional company_name filter parameters.
+_COMPANY_NAME_FILTER_QUERY = Query(
+    default=None,
+    min_length=2,
+    max_length=128,
+    pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$",
+    description=(
+        "Company name filter (2-128 chars; alphanumeric, spaces, hyphens, "
+        "periods, and ampersands only)"
+    ),
+)
+

 # Pydantic models for API
 class CompanyAnalysisResponse(BaseModel):
@@ -489,7 +501,7 @@ async def add_tracked_company(

@app.delete("/admin/tracked/{company_name}", tags=["Admin"])
 async def remove_tracked_company(
-    company_name: Annotated[str, Path(min_length=2, max_length=100, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$")],
+    company_name: Annotated[str, Path(min_length=2, max_length=128, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$")],
    _: UserResponse = Depends(get_current_admin),
 ):
    """Remove a company from the tracked list (admin only)."""
@@ -675,25 +687,27 @@ async def get_analytics_trends(
 # ============== Export Endpoints ==============


-class BatchExportRequest(BaseModel):
-    """Request model for batch ZIP export of analysis results."""
+@app.get("/export/{company_name}", tags=["Export"])
+async def export_company_csv(
+    company_name: Annotated[str, Path(min_length=2, max_length=128, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$")],
+    _: UserResponse = Depends(get_current_user),
+):
+    """Export analysis results for a company as a CSV file.

-    companies: list[CompanyName] = Field(
-        ..., min_length=1, max_length=50, description="List of company names to export"
-    )
-    format: str = Field(
-        default="csv",
-        pattern="^(csv|pdf)$",
-        description="Export format: 'csv' or 'pdf'",
-    )
+    Returns all stored analysis records for the given company, including
+    analysis type, model used, response text, and timestamp.

+    Args:
+        company_name: Company name to export results for

-def _fetch_company_rows(db, company_name: str) -> list:
-    """Fetch all non-cached analysis rows for *company_name* from the DB.
-
-    Returns a list of tuples: (company_name, analysis_type, model, response, timestamp).
-    Returns an empty list when no results exist.
+    Returns:
+        CSV file download
    """
+    import csv
+    import io
+
+    db = get_db_client()
+    # Query all non-cached analysis results for this company
    with db.get_conn() as conn:
        with conn.cursor() as cur:
            cur.execute(
@@ -705,24 +719,43 @@ def _fetch_company_rows(db, company_name: str) -> list:
                """,
                (company_name,),
            )
-            return cur.fetchall()
+            rows = cur.fetchall()

-
-def _build_company_csv(rows) -> bytes:
-    """Render *rows* as CSV bytes."""
-    import csv
-    import io
+    if not rows:
+        raise HTTPException(status_code=404, detail=f"No analysis results found for '{company_name}'")

    output = io.StringIO()
    writer = csv.writer(output)
    writer.writerow(["company_name", "analysis_type", "model", "analysis", "timestamp"])
    for row in rows:
        writer.writerow(row)
-    return output.getvalue().encode("utf-8")
+
+    output.seek(0)
+    safe_name = company_name.replace(" ", "_").lower()
+    return StreamingResponse(
+        iter([output.getvalue()]),
+        media_type="text/csv",
+        headers={"Content-Disposition": f'attachment; filename="sparc_{safe_name}_export.csv"'},
+    )


-def _build_company_pdf(rows, company_name: str) -> bytes:
-    """Render *rows* as PDF bytes using reportlab."""
+@app.get("/export/{company_name}/pdf", tags=["Export"])
+async def export_company_pdf(
+    company_name: Annotated[str, Path(min_length=2, max_length=128, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$")],
+    _: UserResponse = Depends(get_current_user),
+):
+    """Export analysis results for a company as a formatted PDF report.
+
+    Returns all stored analysis records for the given company, including
+    analysis type, model used, response text, and timestamp, formatted
+    as a downloadable PDF document.
+
+    Args:
+        company_name: Company name to export results for
+
+    Returns:
+        PDF file download
+    """
    import io

    from reportlab.lib import colors
@@ -737,6 +770,23 @@ def _build_company_pdf(rows, company_name: str) -> bytes:
        TableStyle,
    )

+    db = get_db_client()
+    with db.get_conn() as conn:
+        with conn.cursor() as cur:
+            cur.execute(
+                """
+                SELECT company_name, analysis_type, model, response, timestamp
+                FROM llm_messages
+                WHERE LOWER(company_name) = LOWER(%s) AND is_cached = FALSE
+                ORDER BY timestamp DESC
+                """,
+                (company_name,),
+            )
+            rows = cur.fetchall()
+
+    if not rows:
+        raise HTTPException(status_code=404, detail=f"No analysis results found for '{company_name}'")
+
    buffer = io.BytesIO()
    doc = SimpleDocTemplate(
        buffer,
@@ -779,11 +829,13 @@ def _build_company_pdf(rows, company_name: str) -> bytes:

    elements = []

-    display_name = rows[0][0]
+    # Title and date
+    display_name = rows[0][0]  # Use the casing from the database
    analysis_date = datetime.now().strftime("%Y-%m-%d")
    elements.append(Paragraph(f"SPARC Analysis Report: {display_name}", title_style))
    elements.append(Paragraph(f"Generated on {analysis_date}", subtitle_style))

+    # Summary table
    summary_data = [
        ["Total Analyses", str(len(rows))],
        ["Analysis Types", ", ".join(sorted(set(r[1] for r in rows)))],
@@ -805,6 +857,7 @@ def _build_company_pdf(rows, company_name: str) -> bytes:
    elements.append(summary_table)
    elements.append(Spacer(1, 16))

+    # Individual analysis sections
    for i, row in enumerate(rows, 1):
        _, analysis_type, model, response, timestamp = row
        ts_str = timestamp.strftime("%Y-%m-%d %H:%M:%S") if hasattr(timestamp, "strftime") else str(timestamp)
@@ -816,11 +869,13 @@ def _build_company_pdf(rows, company_name: str) -> bytes:
            Paragraph(f"<i>Performed: {ts_str}</i>", body_style)
        )

+        # Wrap long response text into paragraphs, escaping XML special chars
        safe_response = (
            response.replace("&", "&amp;")
            .replace("<", "&lt;")
            .replace(">", "&gt;")
        )
+        # Split into manageable paragraphs to avoid overflow
        for line in safe_response.split("\n"):
            if line.strip():
                elements.append(Paragraph(line, body_style))
@@ -831,133 +886,11 @@ def _build_company_pdf(rows, company_name: str) -> bytes:

    doc.build(elements)
    buffer.seek(0)
-    return buffer.getvalue()
-
-
-@app.post("/export/batch", tags=["Export"])
-async def export_batch_zip(
-    request: BatchExportRequest,
-    _: UserResponse = Depends(get_current_user),
-):
-    """Export analysis results for multiple companies as a ZIP archive.
-
-    For each company in the request, fetches all stored analysis records and
-    adds a per-company file (CSV or PDF) to the archive. Companies with no
-    stored results are skipped; a ``manifest.json`` inside the ZIP lists both
-    the exported and skipped companies.
-
-    Args:
-        request: List of company names and desired export format ('csv' or 'pdf')
-
-    Returns:
-        ZIP archive download containing one file per found company plus a manifest
-    """
-    import io
-    import json
-    import zipfile
-
-    db = get_db_client()
-    export_date = datetime.now().strftime("%Y-%m-%d")
-    fmt = request.format
-
-    exported: list[str] = []
-    skipped: list[str] = []
-
-    zip_buffer = io.BytesIO()
-    with zipfile.ZipFile(zip_buffer, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
-        for company_name in request.companies:
-            rows = _fetch_company_rows(db, company_name)
-            if not rows:
-                skipped.append(company_name)
-                continue
-
-            safe_name = company_name.replace(" ", "_").lower()
-            if fmt == "pdf":
-                file_bytes = _build_company_pdf(rows, company_name)
-                filename = f"{safe_name}-analysis-{export_date}.pdf"
-            else:
-                file_bytes = _build_company_csv(rows)
-                filename = f"sparc_{safe_name}_export.csv"
-
-            zf.writestr(filename, file_bytes)
-            exported.append(company_name)
-
-        # Always include a manifest
-        manifest = {
-            "export_date": export_date,
-            "format": fmt,
-            "exported": exported,
-            "skipped": skipped,
-        }
-        zf.writestr("manifest.json", json.dumps(manifest, indent=2))
-
-    zip_buffer.seek(0)
-    zip_filename = f"sparc-export-{export_date}.zip"
-    return StreamingResponse(
-        iter([zip_buffer.getvalue()]),
-        media_type="application/zip",
-        headers={"Content-Disposition": f'attachment; filename="{zip_filename}"'},
-    )
-
-
-@app.get("/export/{company_name}", tags=["Export"])
-async def export_company_csv(
-    company_name: Annotated[str, Path(min_length=2, max_length=100, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$")],
-    _: UserResponse = Depends(get_current_user),
-):
-    """Export analysis results for a company as a CSV file.
-
-    Returns all stored analysis records for the given company, including
-    analysis type, model used, response text, and timestamp.
-
-    Args:
-        company_name: Company name to export results for
-
-    Returns:
-        CSV file download
-    """
-    db = get_db_client()
-    rows = _fetch_company_rows(db, company_name)
-
-    if not rows:
-        raise HTTPException(status_code=404, detail=f"No analysis results found for '{company_name}'")

    safe_name = company_name.replace(" ", "_").lower()
-    return StreamingResponse(
-        iter([_build_company_csv(rows)]),
-        media_type="text/csv",
-        headers={"Content-Disposition": f'attachment; filename="sparc_{safe_name}_export.csv"'},
-    )
-
-
-@app.get("/export/{company_name}/pdf", tags=["Export"])
-async def export_company_pdf(
-    company_name: Annotated[str, Path(min_length=2, max_length=100, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$")],
-    _: UserResponse = Depends(get_current_user),
-):
-    """Export analysis results for a company as a formatted PDF report.
-
-    Returns all stored analysis records for the given company, including
-    analysis type, model used, response text, and timestamp, formatted
-    as a downloadable PDF document.
-
-    Args:
-        company_name: Company name to export results for
-
-    Returns:
-        PDF file download
-    """
-    db = get_db_client()
-    rows = _fetch_company_rows(db, company_name)
-
-    if not rows:
-        raise HTTPException(status_code=404, detail=f"No analysis results found for '{company_name}'")
-
-    safe_name = company_name.replace(" ", "_").lower()
-    analysis_date = datetime.now().strftime("%Y-%m-%d")
    filename = f"{safe_name}-analysis-{analysis_date}.pdf"
    return StreamingResponse(
-        iter([_build_company_pdf(rows, company_name)]),
+        iter([buffer.getvalue()]),
        media_type="application/pdf",
        headers={"Content-Disposition": f'attachment; filename="{filename}"'},
    )
@@ -982,7 +915,7 @@ async def health_check():
    tags=["Analysis"],
 )
 async def analyze_company(
-    company_name: Annotated[str, Path(min_length=2, max_length=100, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$")],
+    company_name: Annotated[str, Path(min_length=2, max_length=128, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$")],
    model: str | None = Query(default=None, description="LLM model to use (e.g. 'openai/gpt-4o'). Defaults to server config."),
    _: UserResponse = Depends(get_current_user),
 ):
@@ -1012,7 +945,7 @@ async def analyze_company(
 )
 async def analyze_single_patent(
    patent_id: str,
-    company_name: Annotated[str, Query(min_length=2, max_length=100, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$", description="Company name for analysis context")],
+    company_name: Annotated[str, Query(min_length=2, max_length=128, pattern=r"^[a-zA-Z0-9][a-zA-Z0-9 \-&.]*$", description="Company name for analysis context")],
    _: UserResponse = Depends(get_current_user),
 ):
    """Analyze a single patent by its publication ID.
@@ -1046,7 +979,7 @@ async def analyze_single_patent(
 async def list_analysis_results(
    company_name: Annotated[
        str | None,
-        Query(description="Filter results by company name"),
+        _COMPANY_NAME_FILTER_QUERY,
    ] = None,
    limit: Annotated[int, Query(ge=1, le=200)] = 50,
    cursor: Annotated[
@@ -1,373 +0,0 @@
-"""Tests for POST /export/batch endpoint (issue #1674).
-
-Covers:
- Single company export (CSV + PDF)
- Multiple company export
- All-missing companies (every requested company is skipped)
- Unauthenticated / invalid-token requests
- Manifest content validation
- Invalid format rejection
-"""
-
-import io
-import json
-import zipfile
-from datetime import datetime, timezone
-from unittest.mock import MagicMock, patch
-
-import pytest
-from fastapi.testclient import TestClient
-
-from SPARC.api import app
-from SPARC.auth import create_access_token
-
-
-@pytest.fixture
-def client():
-    """Create a FastAPI test client."""
-    return TestClient(app)
-
-
-@pytest.fixture(autouse=True)
-def mock_db():
-    """Mock database client for all tests in this module."""
-    db = MagicMock()
-
-    # Auth: user always exists
-    db.get_user_by_id.return_value = {
-        "id": 1,
-        "email": "user@test.com",
-        "role": "user",
-        "created_at": datetime(2025, 1, 1, tzinfo=timezone.utc),
-    }
-
-    # Default cursor mock (overridden per-test via side_effect or return_value)
-    mock_cursor = MagicMock()
-    mock_conn = MagicMock()
-    mock_conn.cursor.return_value.__enter__ = MagicMock(return_value=mock_cursor)
-    mock_conn.cursor.return_value.__exit__ = MagicMock(return_value=False)
-    db.get_conn.return_value.__enter__ = MagicMock(return_value=mock_conn)
-    db.get_conn.return_value.__exit__ = MagicMock(return_value=False)
-    db._mock_cursor = mock_cursor
-
-    with patch("SPARC.api.get_db_client", return_value=db), \
-         patch("SPARC.auth.get_db_client", return_value=db):
-        yield db
-
-
-def _auth_header():
-    token = create_access_token(1, "user@test.com", "user")
-    return {"Authorization": f"Bearer {token}"}
-
-
-def _rows_for(company_name: str):
-    """Return a single sample row for the given company."""
-    return [
-        (
-            company_name,
-            "company_analysis",
-            "anthropic/claude-3.5-sonnet",
-            f"Strong patent portfolio for {company_name}.",
-            datetime(2025, 6, 15, 10, 30, 0),
-        )
-    ]
-
-
-def _open_zip(content: bytes) -> zipfile.ZipFile:
-    """Helper: wrap response bytes as a ZipFile."""
-    return zipfile.ZipFile(io.BytesIO(content))
-
-
-# ---------------------------------------------------------------------------
-# Authentication
-# ---------------------------------------------------------------------------
-
-
-class TestBatchExportAuth:
-    """Unauthenticated and invalid-token requests must be rejected."""
-
-    def test_unauthenticated_returns_401(self, client):
-        response = client.post(
-            "/export/batch",
-            json={"companies": ["NVIDIA"], "format": "csv"},
-        )
-        assert response.status_code == 401
-
-    def test_invalid_token_returns_401(self, client):
-        response = client.post(
-            "/export/batch",
-            json={"companies": ["NVIDIA"], "format": "csv"},
-            headers={"Authorization": "Bearer totally.invalid.token"},
-        )
-        assert response.status_code == 401
-
-
-# ---------------------------------------------------------------------------
-# Single company
-# ---------------------------------------------------------------------------
-
-
-class TestBatchExportSingleCompany:
-    """POST /export/batch with a single company name."""
-
-    def test_single_company_csv_returns_zip(self, client, mock_db):
-        mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA")
-
-        response = client.post(
-            "/export/batch",
-            json={"companies": ["NVIDIA"], "format": "csv"},
-            headers=_auth_header(),
-        )
-
-        assert response.status_code == 200
-        assert response.headers["content-type"] == "application/zip"
-        assert "attachment" in response.headers["content-disposition"]
-        assert "sparc-export-" in response.headers["content-disposition"]
-        assert response.headers["content-disposition"].endswith('.zip"')
-
-    def test_single_company_csv_zip_contains_csv_file(self, client, mock_db):
-        mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA")
-
-        response = client.post(
-            "/export/batch",
-            json={"companies": ["NVIDIA"], "format": "csv"},
-            headers=_auth_header(),
-        )
-
-        zf = _open_zip(response.content)
-        names = zf.namelist()
-        csv_files = [n for n in names if n.endswith(".csv")]
-        assert len(csv_files) == 1
-        assert "nvidia" in csv_files[0]
-
-    def test_single_company_csv_content_is_valid_csv(self, client, mock_db):
-        mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA")
-
-        response = client.post(
-            "/export/batch",
-            json={"companies": ["NVIDIA"], "format": "csv"},
-            headers=_auth_header(),
-        )
-
-        zf = _open_zip(response.content)
-        csv_name = [n for n in zf.namelist() if n.endswith(".csv")][0]
-        csv_text = zf.read(csv_name).decode("utf-8")
-        lines = csv_text.strip().split("\n")
-        assert lines[0].strip() == "company_name,analysis_type,model,analysis,timestamp"
-        assert "NVIDIA" in lines[1]
-
-    def test_single_company_pdf_zip_contains_pdf_file(self, client, mock_db):
-        mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA")
-
-        response = client.post(
-            "/export/batch",
-            json={"companies": ["NVIDIA"], "format": "pdf"},
-            headers=_auth_header(),
-        )
-
-        assert response.status_code == 200
-        zf = _open_zip(response.content)
-        pdf_files = [n for n in zf.namelist() if n.endswith(".pdf")]
-        assert len(pdf_files) == 1
-        # Verify it is actually a PDF (starts with %PDF)
-        pdf_bytes = zf.read(pdf_files[0])
-        assert pdf_bytes[:4] == b"%PDF"
-
-
-# ---------------------------------------------------------------------------
-# Multiple companies
-# ---------------------------------------------------------------------------
-
-
-class TestBatchExportMultipleCompanies:
-    """POST /export/batch with several companies."""
-
-    def test_multiple_companies_each_gets_a_file(self, client, mock_db):
-        companies = ["NVIDIA", "Intel", "AMD"]
-        mock_db._mock_cursor.fetchall.side_effect = [
-            _rows_for("NVIDIA"),
-            _rows_for("Intel"),
-            _rows_for("AMD"),
-        ]
-
-        response = client.post(
-            "/export/batch",
-            json={"companies": companies, "format": "csv"},
-            headers=_auth_header(),
-        )
-
-        assert response.status_code == 200
-        zf = _open_zip(response.content)
-        csv_files = [n for n in zf.namelist() if n.endswith(".csv")]
-        assert len(csv_files) == 3
-
-    def test_multiple_companies_manifest_lists_all_exported(self, client, mock_db):
-        companies = ["NVIDIA", "Intel"]
-        mock_db._mock_cursor.fetchall.side_effect = [
-            _rows_for("NVIDIA"),
-            _rows_for("Intel"),
-        ]
-
-        response = client.post(
-            "/export/batch",
-            json={"companies": companies, "format": "csv"},
-            headers=_auth_header(),
-        )
-
-        zf = _open_zip(response.content)
-        manifest = json.loads(zf.read("manifest.json"))
-        assert set(manifest["exported"]) == {"NVIDIA", "Intel"}
-        assert manifest["skipped"] == []
-        assert manifest["format"] == "csv"
-
-    def test_partial_missing_companies_skipped(self, client, mock_db):
-        """Companies with no data are skipped; others are exported."""
-        mock_db._mock_cursor.fetchall.side_effect = [
-            _rows_for("NVIDIA"),
-            [],  # no data for "UnknownCo"
-        ]
-
-        response = client.post(
-            "/export/batch",
-            json={"companies": ["NVIDIA", "UnknownCo"], "format": "csv"},
-            headers=_auth_header(),
-        )
-
-        assert response.status_code == 200
-        zf = _open_zip(response.content)
-        manifest = json.loads(zf.read("manifest.json"))
-        assert manifest["exported"] == ["NVIDIA"]
-        assert manifest["skipped"] == ["UnknownCo"]
-
-        csv_files = [n for n in zf.namelist() if n.endswith(".csv")]
-        assert len(csv_files) == 1
-
-
-# ---------------------------------------------------------------------------
-# All-missing companies
-# ---------------------------------------------------------------------------
-
-
-class TestBatchExportAllMissing:
-    """When every requested company has no data, the ZIP still returns 200
-    with only a manifest (no per-company files, all listed in skipped)."""
-
-    def test_all_missing_returns_200_with_manifest_only(self, client, mock_db):
-        mock_db._mock_cursor.fetchall.return_value = []
-
-        response = client.post(
-            "/export/batch",
-            json={"companies": ["GhostCo", "PhantomInc"], "format": "csv"},
-            headers=_auth_header(),
-        )
-
-        assert response.status_code == 200
-        zf = _open_zip(response.content)
-        assert "manifest.json" in zf.namelist()
-
-        manifest = json.loads(zf.read("manifest.json"))
-        assert manifest["exported"] == []
-        assert set(manifest["skipped"]) == {"GhostCo", "PhantomInc"}
-
-    def test_all_missing_zip_has_no_data_files(self, client, mock_db):
-        mock_db._mock_cursor.fetchall.return_value = []
-
-        response = client.post(
-            "/export/batch",
-            json={"companies": ["GhostCo"], "format": "csv"},
-            headers=_auth_header(),
-        )
-
-        zf = _open_zip(response.content)
-        data_files = [n for n in zf.namelist() if n != "manifest.json"]
-        assert data_files == []
-
-
-# ---------------------------------------------------------------------------
-# Manifest validation
-# ---------------------------------------------------------------------------
-
-
-class TestBatchExportManifest:
-    """The manifest.json inside every ZIP must be well-formed."""
-
-    def test_manifest_always_present(self, client, mock_db):
-        mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA")
-
-        response = client.post(
-            "/export/batch",
-            json={"companies": ["NVIDIA"], "format": "csv"},
-            headers=_auth_header(),
-        )
-
-        zf = _open_zip(response.content)
-        assert "manifest.json" in zf.namelist()
-
-    def test_manifest_contains_required_keys(self, client, mock_db):
-        mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA")
-
-        response = client.post(
-            "/export/batch",
-            json={"companies": ["NVIDIA"], "format": "csv"},
-            headers=_auth_header(),
-        )
-
-        zf = _open_zip(response.content)
-        manifest = json.loads(zf.read("manifest.json"))
-        assert "export_date" in manifest
-        assert "format" in manifest
-        assert "exported" in manifest
-        assert "skipped" in manifest
-
-    def test_manifest_format_field_matches_request(self, client, mock_db):
-        mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA")
-
-        response = client.post(
-            "/export/batch",
-            json={"companies": ["NVIDIA"], "format": "pdf"},
-            headers=_auth_header(),
-        )
-
-        zf = _open_zip(response.content)
-        manifest = json.loads(zf.read("manifest.json"))
-        assert manifest["format"] == "pdf"
-
-
-# ---------------------------------------------------------------------------
-# Input validation
-# ---------------------------------------------------------------------------
-
-
-class TestBatchExportInputValidation:
-    """Invalid request bodies must return 422."""
-
-    def test_invalid_format_returns_422(self, client):
-        response = client.post(
-            "/export/batch",
-            json={"companies": ["NVIDIA"], "format": "xlsx"},
-            headers=_auth_header(),
-        )
-        assert response.status_code == 422
-
-    def test_empty_companies_list_returns_422(self, client):
-        response = client.post(
-            "/export/batch",
-            json={"companies": [], "format": "csv"},
-            headers=_auth_header(),
-        )
-        assert response.status_code == 422
-
-    def test_default_format_is_csv(self, client, mock_db):
-        """Omitting `format` should default to CSV."""
-        mock_db._mock_cursor.fetchall.return_value = _rows_for("NVIDIA")
-
-        response = client.post(
-            "/export/batch",
-            json={"companies": ["NVIDIA"]},
-            headers=_auth_header(),
-        )
-
-        assert response.status_code == 200
-        zf = _open_zip(response.content)
-        manifest = json.loads(zf.read("manifest.json"))
-        assert manifest["format"] == "csv"
@@ -43,12 +43,18 @@ class TestCompanyNameValidation:

    # --- Too long ---

-    def test_over_100_chars_rejected(self, client, mock_analyzer):
-        """A company name longer than 100 characters should be rejected."""
-        long_name = "A" * 101
+    def test_over_128_chars_rejected(self, client, mock_analyzer):
+        """A company name longer than 128 characters should be rejected."""
+        long_name = "A" * 129
        response = client.get(f"/analyze/{long_name}")
        assert response.status_code == 422

+    def test_exactly_128_chars_accepted(self, client, mock_analyzer):
+        """A company name of exactly 128 characters should be accepted."""
+        max_name = "A" * 128
+        response = client.get(f"/analyze/{max_name}")
+        assert response.status_code != 422
+
    # --- Special characters ---

    @pytest.mark.parametrize(
@@ -95,7 +101,7 @@ class TestCompanyNameValidation:
            "3M",
            "21st Century Fox",
            "ab",  # minimum length
-            "A" * 100,  # maximum length
+            "A" * 128,  # maximum length
        ],
    )
    def test_valid_names_accepted(self, client, mock_analyzer, valid_name):
@@ -118,7 +124,7 @@ class TestCompanyNameValidation:
        """Batch endpoint should reject company names that are too long."""
        response = client.post(
            "/analyze/batch",
-            json={"companies": ["A" * 101]},
+            json={"companies": ["A" * 129]},
        )
        assert response.status_code == 422

@@ -155,3 +161,30 @@ class TestCompanyNameValidation:
            json={"companies": ["-nvidia"]},
        )
        assert response.status_code == 422
+
+    # --- GET /analyze/batch company_name filter validation ---
+
+    def test_batch_filter_special_chars_rejected(self, client, mock_analyzer):
+        """GET /analyze/batch company_name filter rejects disallowed chars."""
+        response = client.get("/analyze/batch", params={"company_name": "nvidia!"})
+        assert response.status_code == 422
+
+    def test_batch_filter_too_short_rejected(self, client, mock_analyzer):
+        """GET /analyze/batch company_name filter rejects names under 2 chars."""
+        response = client.get("/analyze/batch", params={"company_name": "X"})
+        assert response.status_code == 422
+
+    def test_batch_filter_too_long_rejected(self, client, mock_analyzer):
+        """GET /analyze/batch company_name filter rejects names over 128 chars."""
+        response = client.get("/analyze/batch", params={"company_name": "A" * 129})
+        assert response.status_code == 422
+
+    def test_batch_filter_valid_name_accepted(self, client, mock_analyzer):
+        """GET /analyze/batch company_name filter accepts a valid name."""
+        response = client.get("/analyze/batch", params={"company_name": "nvidia"})
+        assert response.status_code != 422
+
+    def test_batch_filter_omitted_accepted(self, client, mock_analyzer):
+        """GET /analyze/batch without company_name filter should work fine."""
+        response = client.get("/analyze/batch")
+        assert response.status_code != 422