diff --git a/SPARC/serp_api.py b/SPARC/serp_api.py index 05120e4..5959c08 100644 --- a/SPARC/serp_api.py +++ b/SPARC/serp_api.py @@ -8,6 +8,21 @@ from SPARC.types import Patents, Patent class SERP: def query(company: str) -> Patents: + """Query Google Patents for a company's recent patents. + + Args: + company: Name of the company to search for + + Returns: + Patents object containing list of patents with PDF links + + Note: + Patents without PDF download links are skipped. This occurs when + Google Patents doesn't have a PDF available for a particular patent + (e.g., recently filed patents, certain international patents, or + patents with restricted access). The returned count may be lower + than the requested number of results. + """ # Make API call params = { "engine": "google_patents", @@ -18,11 +33,14 @@ class SERP: "api_key": config.api_key, } search = serpapi.search(params) - # Convert data into a list of publicationID + # Convert results to Patent objects, skipping any without PDF links patent_ids = [] list_of_patents = search["organic_results"] for patent in list_of_patents: - patent_ids.append(Patent(patent_id=patent["publication_number"], pdf_link=patent["pdf"], summary=None)) + pdf_link = patent.get("pdf") + if pdf_link: + patent_ids.append(Patent(patent_id=patent["publication_number"], pdf_link=pdf_link, summary=None)) + # Patents without PDF links are skipped (see docstring for details) return Patents(patents=patent_ids)