From 6533cef56f327689f44aa9855a7cbead34f5bc13 Mon Sep 17 00:00:00 2001
From: 0xWheatyz <wyatt@leeworks.dev>
Date: Thu, 19 Feb 2026 18:52:54 -0500
Subject: [PATCH] test: add pytest framework and initial test suite
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Added pytest and pytest-mock to requirements.txt for testing infrastructure.

Created tests/ directory with comprehensive test coverage for:
- Text cleaning functions (figure references, whitespace, line numbers)
- Section extraction logic (abstract, claims, case sensitivity)

All 8 tests passing. This provides a foundation for test-driven
development as we continue building the LLM integration.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 requirements.txt       |   2 +
 tests/__init__.py      |   1 +
 tests/test_serp_api.py | 104 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 107 insertions(+)
 create mode 100644 tests/__init__.py
 create mode 100644 tests/test_serp_api.py

diff --git a/requirements.txt b/requirements.txt
index 2b024b5..8e31464 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,3 +2,5 @@ python-dotenv
 serpapi
 pdfplumber
 requests
+pytest
+pytest-mock
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..db58cc1
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1 @@
+"""Tests for SPARC patent analysis system."""
diff --git a/tests/test_serp_api.py b/tests/test_serp_api.py
new file mode 100644
index 0000000..42ee7c2
--- /dev/null
+++ b/tests/test_serp_api.py
@@ -0,0 +1,104 @@
+"""Tests for SERP API patent retrieval and parsing functionality."""
+
+import pytest
+from SPARC.serp_api import SERP
+
+
+class TestTextCleaning:
+    """Test patent text cleaning functionality."""
+
+    def test_clean_patent_text_removes_figure_references(self):
+        """Test that figure references are removed from text."""
+        text = "This is a description (see FIG. 1) of the invention."
+        cleaned = SERP.clean_patent_text(text)
+        assert "(see FIG. 1)" not in cleaned
+        assert "This is a description  of the invention." in cleaned
+
+    def test_clean_patent_text_removes_fig_labels(self):
+        """Test that FIG labels are removed from text."""
+        text = "As shown in FIG. 2A the circuit operates."
+        cleaned = SERP.clean_patent_text(text)
+        assert "FIG. 2A" not in cleaned
+
+    def test_clean_patent_text_removes_excessive_whitespace(self):
+        """Test that excessive whitespace is normalized."""
+        text = "Line 1\n\n\n\n\nLine 2"
+        cleaned = SERP.clean_patent_text(text)
+        assert "\n\n\n\n\n" not in cleaned
+        assert "Line 1\n\nLine 2" in cleaned
+
+    def test_clean_patent_text_removes_line_numbers(self):
+        """Test that line numbers are removed from text."""
+        text = "Some text\n42\nMore text"
+        cleaned = SERP.clean_patent_text(text)
+        # Line numbers on their own line should be removed
+        assert cleaned.strip() != "Some text\n42\nMore text"
+
+
+class TestSectionExtraction:
+    """Test patent section extraction functionality."""
+
+    def test_extract_section_finds_abstract(self):
+        """Test extraction of abstract section."""
+        text = """
+        PATENT DOCUMENT
+
+        ABSTRACT
+        This is the abstract text describing the invention.
+
+        BACKGROUND
+        This is background information.
+        """
+        result = SERP.extract_section(
+            text,
+            start_patterns=[r"ABSTRACT"],
+            end_patterns=[r"BACKGROUND"],
+        )
+        assert "This is the abstract text" in result
+        assert "BACKGROUND" not in result
+
+    def test_extract_section_finds_claims(self):
+        """Test extraction of claims section."""
+        text = """
+        SUMMARY
+        Summary text here.
+
+        What is claimed is:
+        1. A method comprising steps A and B.
+        2. The method of claim 1, further comprising step C.
+
+        ABSTRACT
+        Abstract text.
+        """
+        result = SERP.extract_section(
+            text,
+            start_patterns=[r"What is claimed is:"],
+            end_patterns=[r"ABSTRACT"],
+        )
+        assert "1. A method comprising" in result
+        assert "2. The method of claim 1" in result
+        assert "ABSTRACT" not in result
+
+    def test_extract_section_returns_empty_when_not_found(self):
+        """Test that empty string is returned when section not found."""
+        text = "This text has no matching patterns."
+        result = SERP.extract_section(
+            text, start_patterns=[r"ABSTRACT"], end_patterns=[r"BACKGROUND"]
+        )
+        assert result == ""
+
+    def test_extract_section_handles_case_insensitive(self):
+        """Test that section extraction is case insensitive."""
+        text = """
+        abstract
+        This is the abstract in lowercase.
+
+        background
+        Background text.
+        """
+        result = SERP.extract_section(
+            text,
+            start_patterns=[r"ABSTRACT"],
+            end_patterns=[r"BACKGROUND"],
+        )
+        assert "This is the abstract in lowercase" in result