"""Tests for SERP API patent retrieval and parsing functionality.""" import pytest from SPARC.serp_api import SERP class TestTextCleaning: """Test patent text cleaning functionality.""" def test_clean_patent_text_removes_figure_references(self): """Test that figure references are removed from text.""" text = "This is a description (see FIG. 1) of the invention." cleaned = SERP.clean_patent_text(text) assert "(see FIG. 1)" not in cleaned assert "This is a description of the invention." in cleaned def test_clean_patent_text_removes_fig_labels(self): """Test that FIG labels are removed from text.""" text = "As shown in FIG. 2A the circuit operates." cleaned = SERP.clean_patent_text(text) assert "FIG. 2A" not in cleaned def test_clean_patent_text_removes_excessive_whitespace(self): """Test that excessive whitespace is normalized.""" text = "Line 1\n\n\n\n\nLine 2" cleaned = SERP.clean_patent_text(text) assert "\n\n\n\n\n" not in cleaned assert "Line 1\n\nLine 2" in cleaned def test_clean_patent_text_removes_line_numbers(self): """Test that line numbers are removed from text.""" text = "Some text\n42\nMore text" cleaned = SERP.clean_patent_text(text) # Line numbers on their own line should be removed assert cleaned.strip() != "Some text\n42\nMore text" class TestSectionExtraction: """Test patent section extraction functionality.""" def test_extract_section_finds_abstract(self): """Test extraction of abstract section.""" text = """ PATENT DOCUMENT ABSTRACT This is the abstract text describing the invention. BACKGROUND This is background information. """ result = SERP.extract_section( text, start_patterns=[r"ABSTRACT"], end_patterns=[r"BACKGROUND"], ) assert "This is the abstract text" in result assert "BACKGROUND" not in result def test_extract_section_finds_claims(self): """Test extraction of claims section.""" text = """ SUMMARY Summary text here. What is claimed is: 1. A method comprising steps A and B. 2. The method of claim 1, further comprising step C. ABSTRACT Abstract text. """ result = SERP.extract_section( text, start_patterns=[r"What is claimed is:"], end_patterns=[r"ABSTRACT"], ) assert "1. A method comprising" in result assert "2. The method of claim 1" in result assert "ABSTRACT" not in result def test_extract_section_returns_empty_when_not_found(self): """Test that empty string is returned when section not found.""" text = "This text has no matching patterns." result = SERP.extract_section( text, start_patterns=[r"ABSTRACT"], end_patterns=[r"BACKGROUND"] ) assert result == "" def test_extract_section_handles_case_insensitive(self): """Test that section extraction is case insensitive.""" text = """ abstract This is the abstract in lowercase. background Background text. """ result = SERP.extract_section( text, start_patterns=[r"ABSTRACT"], end_patterns=[r"BACKGROUND"], ) assert "This is the abstract in lowercase" in result