test: add pytest framework and initial test suite
Added pytest and pytest-mock to requirements.txt for testing infrastructure. Created tests/ directory with comprehensive test coverage for: - Text cleaning functions (figure references, whitespace, line numbers) - Section extraction logic (abstract, claims, case sensitivity) All 8 tests passing. This provides a foundation for test-driven development as we continue building the LLM integration. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
58f2bdc238
commit
6533cef56f
@ -2,3 +2,5 @@ python-dotenv
|
|||||||
serpapi
|
serpapi
|
||||||
pdfplumber
|
pdfplumber
|
||||||
requests
|
requests
|
||||||
|
pytest
|
||||||
|
pytest-mock
|
||||||
|
|||||||
1
tests/__init__.py
Normal file
1
tests/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
"""Tests for SPARC patent analysis system."""
|
||||||
104
tests/test_serp_api.py
Normal file
104
tests/test_serp_api.py
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
"""Tests for SERP API patent retrieval and parsing functionality."""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from SPARC.serp_api import SERP
|
||||||
|
|
||||||
|
|
||||||
|
class TestTextCleaning:
|
||||||
|
"""Test patent text cleaning functionality."""
|
||||||
|
|
||||||
|
def test_clean_patent_text_removes_figure_references(self):
|
||||||
|
"""Test that figure references are removed from text."""
|
||||||
|
text = "This is a description (see FIG. 1) of the invention."
|
||||||
|
cleaned = SERP.clean_patent_text(text)
|
||||||
|
assert "(see FIG. 1)" not in cleaned
|
||||||
|
assert "This is a description of the invention." in cleaned
|
||||||
|
|
||||||
|
def test_clean_patent_text_removes_fig_labels(self):
|
||||||
|
"""Test that FIG labels are removed from text."""
|
||||||
|
text = "As shown in FIG. 2A the circuit operates."
|
||||||
|
cleaned = SERP.clean_patent_text(text)
|
||||||
|
assert "FIG. 2A" not in cleaned
|
||||||
|
|
||||||
|
def test_clean_patent_text_removes_excessive_whitespace(self):
|
||||||
|
"""Test that excessive whitespace is normalized."""
|
||||||
|
text = "Line 1\n\n\n\n\nLine 2"
|
||||||
|
cleaned = SERP.clean_patent_text(text)
|
||||||
|
assert "\n\n\n\n\n" not in cleaned
|
||||||
|
assert "Line 1\n\nLine 2" in cleaned
|
||||||
|
|
||||||
|
def test_clean_patent_text_removes_line_numbers(self):
|
||||||
|
"""Test that line numbers are removed from text."""
|
||||||
|
text = "Some text\n42\nMore text"
|
||||||
|
cleaned = SERP.clean_patent_text(text)
|
||||||
|
# Line numbers on their own line should be removed
|
||||||
|
assert cleaned.strip() != "Some text\n42\nMore text"
|
||||||
|
|
||||||
|
|
||||||
|
class TestSectionExtraction:
|
||||||
|
"""Test patent section extraction functionality."""
|
||||||
|
|
||||||
|
def test_extract_section_finds_abstract(self):
|
||||||
|
"""Test extraction of abstract section."""
|
||||||
|
text = """
|
||||||
|
PATENT DOCUMENT
|
||||||
|
|
||||||
|
ABSTRACT
|
||||||
|
This is the abstract text describing the invention.
|
||||||
|
|
||||||
|
BACKGROUND
|
||||||
|
This is background information.
|
||||||
|
"""
|
||||||
|
result = SERP.extract_section(
|
||||||
|
text,
|
||||||
|
start_patterns=[r"ABSTRACT"],
|
||||||
|
end_patterns=[r"BACKGROUND"],
|
||||||
|
)
|
||||||
|
assert "This is the abstract text" in result
|
||||||
|
assert "BACKGROUND" not in result
|
||||||
|
|
||||||
|
def test_extract_section_finds_claims(self):
|
||||||
|
"""Test extraction of claims section."""
|
||||||
|
text = """
|
||||||
|
SUMMARY
|
||||||
|
Summary text here.
|
||||||
|
|
||||||
|
What is claimed is:
|
||||||
|
1. A method comprising steps A and B.
|
||||||
|
2. The method of claim 1, further comprising step C.
|
||||||
|
|
||||||
|
ABSTRACT
|
||||||
|
Abstract text.
|
||||||
|
"""
|
||||||
|
result = SERP.extract_section(
|
||||||
|
text,
|
||||||
|
start_patterns=[r"What is claimed is:"],
|
||||||
|
end_patterns=[r"ABSTRACT"],
|
||||||
|
)
|
||||||
|
assert "1. A method comprising" in result
|
||||||
|
assert "2. The method of claim 1" in result
|
||||||
|
assert "ABSTRACT" not in result
|
||||||
|
|
||||||
|
def test_extract_section_returns_empty_when_not_found(self):
|
||||||
|
"""Test that empty string is returned when section not found."""
|
||||||
|
text = "This text has no matching patterns."
|
||||||
|
result = SERP.extract_section(
|
||||||
|
text, start_patterns=[r"ABSTRACT"], end_patterns=[r"BACKGROUND"]
|
||||||
|
)
|
||||||
|
assert result == ""
|
||||||
|
|
||||||
|
def test_extract_section_handles_case_insensitive(self):
|
||||||
|
"""Test that section extraction is case insensitive."""
|
||||||
|
text = """
|
||||||
|
abstract
|
||||||
|
This is the abstract in lowercase.
|
||||||
|
|
||||||
|
background
|
||||||
|
Background text.
|
||||||
|
"""
|
||||||
|
result = SERP.extract_section(
|
||||||
|
text,
|
||||||
|
start_patterns=[r"ABSTRACT"],
|
||||||
|
end_patterns=[r"BACKGROUND"],
|
||||||
|
)
|
||||||
|
assert "This is the abstract in lowercase" in result
|
||||||
Loading…
Reference in New Issue
Block a user