forked from 0xWheatyz/SPARC
feat: add patent content minimization for LLM consumption
Implemented minimize_patent_for_llm() function that reduces patent content by keeping only essential sections (abstract, claims, summary) and explicitly excludes the verbose detailed description section. This reduces token usage while preserving core innovation details needed for company performance estimation. Added comprehensive test coverage (5 new tests) for: - Essential section inclusion - Description section exclusion - Missing section handling - Empty section handling - Section separator formatting All 13 tests passing. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -102,3 +102,77 @@ class TestSectionExtraction:
|
||||
end_patterns=[r"BACKGROUND"],
|
||||
)
|
||||
assert "This is the abstract in lowercase" in result
|
||||
|
||||
|
||||
class TestPatentMinimization:
|
||||
"""Test patent content minimization for LLM consumption."""
|
||||
|
||||
def test_minimize_includes_all_essential_sections(self):
|
||||
"""Test that all essential sections are included in minimized output."""
|
||||
sections = {
|
||||
"abstract": "This is the abstract.",
|
||||
"claims": "1. A method for doing X.",
|
||||
"summary": "This invention relates to X.",
|
||||
"description": "Very long detailed description...",
|
||||
}
|
||||
result = SERP.minimize_patent_for_llm(sections)
|
||||
|
||||
assert "ABSTRACT:" in result
|
||||
assert "This is the abstract." in result
|
||||
assert "CLAIMS:" in result
|
||||
assert "1. A method for doing X." in result
|
||||
assert "SUMMARY:" in result
|
||||
assert "This invention relates to X." in result
|
||||
|
||||
def test_minimize_excludes_description(self):
|
||||
"""Test that detailed description is excluded from minimized output."""
|
||||
sections = {
|
||||
"abstract": "This is the abstract.",
|
||||
"claims": "1. A method for doing X.",
|
||||
"summary": "This invention relates to X.",
|
||||
"description": "Very long detailed description that should be excluded.",
|
||||
}
|
||||
result = SERP.minimize_patent_for_llm(sections)
|
||||
|
||||
assert "Very long detailed description" not in result
|
||||
assert "DESCRIPTION:" not in result
|
||||
|
||||
def test_minimize_handles_missing_sections(self):
|
||||
"""Test that minimization handles missing sections gracefully."""
|
||||
sections = {
|
||||
"abstract": "This is the abstract.",
|
||||
# claims missing
|
||||
# summary missing
|
||||
"description": "Description text.",
|
||||
}
|
||||
result = SERP.minimize_patent_for_llm(sections)
|
||||
|
||||
assert "ABSTRACT:" in result
|
||||
assert "This is the abstract." in result
|
||||
# Should not error on missing sections
|
||||
assert isinstance(result, str)
|
||||
|
||||
def test_minimize_with_empty_sections(self):
|
||||
"""Test that empty sections are handled properly."""
|
||||
sections = {
|
||||
"abstract": "",
|
||||
"claims": "1. A method.",
|
||||
"summary": "",
|
||||
}
|
||||
result = SERP.minimize_patent_for_llm(sections)
|
||||
|
||||
# Empty sections should not appear
|
||||
assert result.count("CLAIMS:") == 1
|
||||
assert "1. A method." in result
|
||||
|
||||
def test_minimize_separates_sections_with_double_newline(self):
|
||||
"""Test that sections are properly separated."""
|
||||
sections = {
|
||||
"abstract": "Abstract text.",
|
||||
"claims": "Claims text.",
|
||||
"summary": "Summary text.",
|
||||
}
|
||||
result = SERP.minimize_patent_for_llm(sections)
|
||||
|
||||
# Sections should be separated by double newlines
|
||||
assert "\n\n" in result
|
||||
|
||||
Reference in New Issue
Block a user