From 6533cef56f327689f44aa9855a7cbead34f5bc13 Mon Sep 17 00:00:00 2001 From: 0xWheatyz Date: Thu, 19 Feb 2026 18:52:54 -0500 Subject: [PATCH] test: add pytest framework and initial test suite MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added pytest and pytest-mock to requirements.txt for testing infrastructure. Created tests/ directory with comprehensive test coverage for: - Text cleaning functions (figure references, whitespace, line numbers) - Section extraction logic (abstract, claims, case sensitivity) All 8 tests passing. This provides a foundation for test-driven development as we continue building the LLM integration. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- requirements.txt | 2 + tests/__init__.py | 1 + tests/test_serp_api.py | 104 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+) create mode 100644 tests/__init__.py create mode 100644 tests/test_serp_api.py diff --git a/requirements.txt b/requirements.txt index 2b024b5..8e31464 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,5 @@ python-dotenv serpapi pdfplumber requests +pytest +pytest-mock diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..db58cc1 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for SPARC patent analysis system.""" diff --git a/tests/test_serp_api.py b/tests/test_serp_api.py new file mode 100644 index 0000000..42ee7c2 --- /dev/null +++ b/tests/test_serp_api.py @@ -0,0 +1,104 @@ +"""Tests for SERP API patent retrieval and parsing functionality.""" + +import pytest +from SPARC.serp_api import SERP + + +class TestTextCleaning: + """Test patent text cleaning functionality.""" + + def test_clean_patent_text_removes_figure_references(self): + """Test that figure references are removed from text.""" + text = "This is a description (see FIG. 1) of the invention." + cleaned = SERP.clean_patent_text(text) + assert "(see FIG. 1)" not in cleaned + assert "This is a description of the invention." in cleaned + + def test_clean_patent_text_removes_fig_labels(self): + """Test that FIG labels are removed from text.""" + text = "As shown in FIG. 2A the circuit operates." + cleaned = SERP.clean_patent_text(text) + assert "FIG. 2A" not in cleaned + + def test_clean_patent_text_removes_excessive_whitespace(self): + """Test that excessive whitespace is normalized.""" + text = "Line 1\n\n\n\n\nLine 2" + cleaned = SERP.clean_patent_text(text) + assert "\n\n\n\n\n" not in cleaned + assert "Line 1\n\nLine 2" in cleaned + + def test_clean_patent_text_removes_line_numbers(self): + """Test that line numbers are removed from text.""" + text = "Some text\n42\nMore text" + cleaned = SERP.clean_patent_text(text) + # Line numbers on their own line should be removed + assert cleaned.strip() != "Some text\n42\nMore text" + + +class TestSectionExtraction: + """Test patent section extraction functionality.""" + + def test_extract_section_finds_abstract(self): + """Test extraction of abstract section.""" + text = """ + PATENT DOCUMENT + + ABSTRACT + This is the abstract text describing the invention. + + BACKGROUND + This is background information. + """ + result = SERP.extract_section( + text, + start_patterns=[r"ABSTRACT"], + end_patterns=[r"BACKGROUND"], + ) + assert "This is the abstract text" in result + assert "BACKGROUND" not in result + + def test_extract_section_finds_claims(self): + """Test extraction of claims section.""" + text = """ + SUMMARY + Summary text here. + + What is claimed is: + 1. A method comprising steps A and B. + 2. The method of claim 1, further comprising step C. + + ABSTRACT + Abstract text. + """ + result = SERP.extract_section( + text, + start_patterns=[r"What is claimed is:"], + end_patterns=[r"ABSTRACT"], + ) + assert "1. A method comprising" in result + assert "2. The method of claim 1" in result + assert "ABSTRACT" not in result + + def test_extract_section_returns_empty_when_not_found(self): + """Test that empty string is returned when section not found.""" + text = "This text has no matching patterns." + result = SERP.extract_section( + text, start_patterns=[r"ABSTRACT"], end_patterns=[r"BACKGROUND"] + ) + assert result == "" + + def test_extract_section_handles_case_insensitive(self): + """Test that section extraction is case insensitive.""" + text = """ + abstract + This is the abstract in lowercase. + + background + Background text. + """ + result = SERP.extract_section( + text, + start_patterns=[r"ABSTRACT"], + end_patterns=[r"BACKGROUND"], + ) + assert "This is the abstract in lowercase" in result