fix: skip patents without PDF links in SERP query

Not all Google Patents results include PDF download links. Previously this caused a KeyError when accessing patent["pdf"]. Now patents without PDF links are gracefully skipped with documentation explaining when this occurs (recent filings, international patents, restricted access). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2026-03-13 15:37:24 -04:00
parent 7eb72ab549
commit 4e419166e8
1 changed files with 20 additions and 2 deletions
@@ -8,6 +8,21 @@ from SPARC.types import Patents, Patent

 class SERP:
  def query(company: str) -> Patents:
+    """Query Google Patents for a company's recent patents.
+
+    Args:
+        company: Name of the company to search for
+
+    Returns:
+        Patents object containing list of patents with PDF links
+
+    Note:
+        Patents without PDF download links are skipped. This occurs when
+        Google Patents doesn't have a PDF available for a particular patent
+        (e.g., recently filed patents, certain international patents, or
+        patents with restricted access). The returned count may be lower
+        than the requested number of results.
+    """
    # Make API call
    params = {
      "engine": "google_patents",
@@ -18,11 +33,14 @@ class SERP:
      "api_key": config.api_key,
    }
    search = serpapi.search(params)
-    # Convert data into a list of publicationID
+    # Convert results to Patent objects, skipping any without PDF links
    patent_ids = []
    list_of_patents = search["organic_results"]
    for patent in list_of_patents:
-        patent_ids.append(Patent(patent_id=patent["publication_number"], pdf_link=patent["pdf"], summary=None))
+        pdf_link = patent.get("pdf")
+        if pdf_link:
+            patent_ids.append(Patent(patent_id=patent["publication_number"], pdf_link=pdf_link, summary=None))
+        # Patents without PDF links are skipped (see docstring for details)

    return Patents(patents=patent_ids)