test: add pytest framework and initial test suite
Added pytest and pytest-mock to requirements.txt for testing infrastructure. Created tests/ directory with comprehensive test coverage for: - Text cleaning functions (figure references, whitespace, line numbers) - Section extraction logic (abstract, claims, case sensitivity) All 8 tests passing. This provides a foundation for test-driven development as we continue building the LLM integration. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
58f2bdc238
commit
6533cef56f
@ -2,3 +2,5 @@ python-dotenv
|
||||
serpapi
|
||||
pdfplumber
|
||||
requests
|
||||
pytest
|
||||
pytest-mock
|
||||
|
||||
1
tests/__init__.py
Normal file
1
tests/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
"""Tests for SPARC patent analysis system."""
|
||||
104
tests/test_serp_api.py
Normal file
104
tests/test_serp_api.py
Normal file
@ -0,0 +1,104 @@
|
||||
"""Tests for SERP API patent retrieval and parsing functionality."""
|
||||
|
||||
import pytest
|
||||
from SPARC.serp_api import SERP
|
||||
|
||||
|
||||
class TestTextCleaning:
|
||||
"""Test patent text cleaning functionality."""
|
||||
|
||||
def test_clean_patent_text_removes_figure_references(self):
|
||||
"""Test that figure references are removed from text."""
|
||||
text = "This is a description (see FIG. 1) of the invention."
|
||||
cleaned = SERP.clean_patent_text(text)
|
||||
assert "(see FIG. 1)" not in cleaned
|
||||
assert "This is a description of the invention." in cleaned
|
||||
|
||||
def test_clean_patent_text_removes_fig_labels(self):
|
||||
"""Test that FIG labels are removed from text."""
|
||||
text = "As shown in FIG. 2A the circuit operates."
|
||||
cleaned = SERP.clean_patent_text(text)
|
||||
assert "FIG. 2A" not in cleaned
|
||||
|
||||
def test_clean_patent_text_removes_excessive_whitespace(self):
|
||||
"""Test that excessive whitespace is normalized."""
|
||||
text = "Line 1\n\n\n\n\nLine 2"
|
||||
cleaned = SERP.clean_patent_text(text)
|
||||
assert "\n\n\n\n\n" not in cleaned
|
||||
assert "Line 1\n\nLine 2" in cleaned
|
||||
|
||||
def test_clean_patent_text_removes_line_numbers(self):
|
||||
"""Test that line numbers are removed from text."""
|
||||
text = "Some text\n42\nMore text"
|
||||
cleaned = SERP.clean_patent_text(text)
|
||||
# Line numbers on their own line should be removed
|
||||
assert cleaned.strip() != "Some text\n42\nMore text"
|
||||
|
||||
|
||||
class TestSectionExtraction:
|
||||
"""Test patent section extraction functionality."""
|
||||
|
||||
def test_extract_section_finds_abstract(self):
|
||||
"""Test extraction of abstract section."""
|
||||
text = """
|
||||
PATENT DOCUMENT
|
||||
|
||||
ABSTRACT
|
||||
This is the abstract text describing the invention.
|
||||
|
||||
BACKGROUND
|
||||
This is background information.
|
||||
"""
|
||||
result = SERP.extract_section(
|
||||
text,
|
||||
start_patterns=[r"ABSTRACT"],
|
||||
end_patterns=[r"BACKGROUND"],
|
||||
)
|
||||
assert "This is the abstract text" in result
|
||||
assert "BACKGROUND" not in result
|
||||
|
||||
def test_extract_section_finds_claims(self):
|
||||
"""Test extraction of claims section."""
|
||||
text = """
|
||||
SUMMARY
|
||||
Summary text here.
|
||||
|
||||
What is claimed is:
|
||||
1. A method comprising steps A and B.
|
||||
2. The method of claim 1, further comprising step C.
|
||||
|
||||
ABSTRACT
|
||||
Abstract text.
|
||||
"""
|
||||
result = SERP.extract_section(
|
||||
text,
|
||||
start_patterns=[r"What is claimed is:"],
|
||||
end_patterns=[r"ABSTRACT"],
|
||||
)
|
||||
assert "1. A method comprising" in result
|
||||
assert "2. The method of claim 1" in result
|
||||
assert "ABSTRACT" not in result
|
||||
|
||||
def test_extract_section_returns_empty_when_not_found(self):
|
||||
"""Test that empty string is returned when section not found."""
|
||||
text = "This text has no matching patterns."
|
||||
result = SERP.extract_section(
|
||||
text, start_patterns=[r"ABSTRACT"], end_patterns=[r"BACKGROUND"]
|
||||
)
|
||||
assert result == ""
|
||||
|
||||
def test_extract_section_handles_case_insensitive(self):
|
||||
"""Test that section extraction is case insensitive."""
|
||||
text = """
|
||||
abstract
|
||||
This is the abstract in lowercase.
|
||||
|
||||
background
|
||||
Background text.
|
||||
"""
|
||||
result = SERP.extract_section(
|
||||
text,
|
||||
start_patterns=[r"ABSTRACT"],
|
||||
end_patterns=[r"BACKGROUND"],
|
||||
)
|
||||
assert "This is the abstract in lowercase" in result
|
||||
Loading…
Reference in New Issue
Block a user