From 4e419166e81257eee50506dc6d52bc846a4e84dc Mon Sep 17 00:00:00 2001 From: 0xWheatyz Date: Fri, 13 Mar 2026 15:37:24 -0400 Subject: [PATCH] fix: skip patents without PDF links in SERP query MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not all Google Patents results include PDF download links. Previously this caused a KeyError when accessing patent["pdf"]. Now patents without PDF links are gracefully skipped with documentation explaining when this occurs (recent filings, international patents, restricted access). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- SPARC/serp_api.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/SPARC/serp_api.py b/SPARC/serp_api.py index 05120e4..5959c08 100644 --- a/SPARC/serp_api.py +++ b/SPARC/serp_api.py @@ -8,6 +8,21 @@ from SPARC.types import Patents, Patent class SERP: def query(company: str) -> Patents: + """Query Google Patents for a company's recent patents. + + Args: + company: Name of the company to search for + + Returns: + Patents object containing list of patents with PDF links + + Note: + Patents without PDF download links are skipped. This occurs when + Google Patents doesn't have a PDF available for a particular patent + (e.g., recently filed patents, certain international patents, or + patents with restricted access). The returned count may be lower + than the requested number of results. + """ # Make API call params = { "engine": "google_patents", @@ -18,11 +33,14 @@ class SERP: "api_key": config.api_key, } search = serpapi.search(params) - # Convert data into a list of publicationID + # Convert results to Patent objects, skipping any without PDF links patent_ids = [] list_of_patents = search["organic_results"] for patent in list_of_patents: - patent_ids.append(Patent(patent_id=patent["publication_number"], pdf_link=patent["pdf"], summary=None)) + pdf_link = patent.get("pdf") + if pdf_link: + patent_ids.append(Patent(patent_id=patent["publication_number"], pdf_link=pdf_link, summary=None)) + # Patents without PDF links are skipped (see docstring for details) return Patents(patents=patent_ids)