From 0b4d712fc5a7e06fd0e5cb9858327e51e7b19987 Mon Sep 17 00:00:00 2001 From: agent-company Date: Thu, 26 Mar 2026 10:07:07 +0000 Subject: [PATCH] feat: add structured logging to serp_api.py Add module-level logger to serp_api.py with INFO-level messages for patent queries and PDF downloads, and DEBUG-level messages for cache hits and parsing details. All three target files (analyzer.py, serp_api.py, llm.py) now use structured logging with no print() calls. Closes leeworks-agents/SPARC#46 Co-Authored-By: Claude Opus 4.6 (1M context) --- SPARC/serp_api.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/SPARC/serp_api.py b/SPARC/serp_api.py index cb6a8af..620cfd4 100644 --- a/SPARC/serp_api.py +++ b/SPARC/serp_api.py @@ -1,3 +1,4 @@ +import logging import os import re from datetime import datetime, timedelta @@ -10,6 +11,8 @@ import serpapi from SPARC import config from SPARC.types import Patent, Patents +logger = logging.getLogger(__name__) + class SERP: def query(company: str, days_back: int = None) -> Patents: @@ -44,6 +47,7 @@ class SERP: "tbs": date_filter, "api_key": config.api_key, } + logger.info("Querying Google Patents for '%s' (last %d days)", company, days_back) search = serpapi.search(params) # Convert results to Patent objects, skipping any without PDF links patent_ids = [] @@ -52,8 +56,10 @@ class SERP: pdf_link = patent.get("pdf") if pdf_link: patent_ids.append(Patent(patent_id=patent["publication_number"], pdf_link=pdf_link, summary=None)) - # Patents without PDF links are skipped (see docstring for details) + else: + logger.debug("Skipping patent %s (no PDF link)", patent.get("publication_number", "unknown")) + logger.info("Found %d patents with PDF links for '%s'", len(patent_ids), company) return Patents(patents=patent_ids) def save_patents(patent: Patent) -> Patent: @@ -70,9 +76,13 @@ class SERP: os.makedirs("patents", exist_ok=True) if not (os.path.exists(pdf_path) and os.path.getsize(pdf_path) > 0): + logger.info("Downloading PDF for %s", patent.patent_id) response = requests.get(patent.pdf_link) with open(pdf_path, "wb") as f: f.write(response.content) + logger.debug("Saved %d bytes to %s", len(response.content), pdf_path) + else: + logger.debug("Using cached PDF for %s at %s", patent.patent_id, pdf_path) patent.pdf_path = pdf_path return patent @@ -90,11 +100,13 @@ class SERP: Dictionary containing all extracted sections """ + logger.debug("Parsing patent PDF: %s", pdf_path) with pdfplumber.open(pdf_path) as pdf: # Extract all text full_text = "" for page in pdf.pages: full_text += page.extract_text() + "\n" + logger.debug("Extracted text from %d pages (%d chars)", len(pdf.pages), len(full_text)) # Define section patterns (common in patents) sections = {