2026-03-31 11:53:47 +00:00
3 changed files with 4762 additions and 5 deletions
@@ -54,6 +54,21 @@ docker-compose up -d
 # - API Docs: http://localhost:8000/docs
 ```

+#### Patent PDF Storage
+
+The API stores downloaded patent PDFs in a `patents/` directory. In Docker,
+this is mounted as a bind mount (`./patents:/app/patents`) so that PDFs persist
+across container restarts.
+
+If you deploy to a different environment, ensure the `patents/` directory is a
+persistent volume. Without it, PDFs will be re-downloaded on every analysis.
+
+```yaml
+# docker-compose.yml excerpt
+volumes:
+  - ./patents:/app/patents
+```
+
 ### NixOS

 ```bash
@@ -104,21 +104,33 @@ class CompanyAnalyzer:
    def analyze_single_patent(self, patent_id: str, company_name: str) -> str:
        """Analyze a single patent by ID.

-        Useful for focused analysis of specific innovations.
+        Prerequisite:
+            The patent PDF must already exist at ``patents/{patent_id}.pdf``
+            before calling this method. PDFs are downloaded automatically when
+            using the batch analysis pipeline (``analyze_company`` or the
+            ``/analyze/batch`` API endpoint). For standalone usage, download
+            the PDF manually or call ``SERP.save_patents()`` first.

        Args:
-          patent_id: Publication ID of the patent
+          patent_id: Publication ID of the patent (e.g. "US-11234567-B2")
          company_name: Name of the company (for context)

        Returns:
          Analysis of the specific patent's innovation quality
+
+        Raises:
+          FileNotFoundError: If the patent PDF is not found at the expected path.
        """
-        # Note: This simplified version assumes the patent PDF is already downloaded
-        # A more complete implementation would support direct patent ID lookup
-        print(f"Analyzing patent {patent_id} for {company_name}...")
+        import os

        patent_path = f"patents/{patent_id}.pdf"

+        if not os.path.exists(patent_path):
+            raise FileNotFoundError(
+                f"Patent PDF not found at '{patent_path}'. "
+                f"Download the PDF first using SERP.save_patents() or the batch analysis pipeline."
+            )
+
        try:
            sections = SERP.parse_patent_pdf(patent_path)
            minimized_content = SERP.minimize_patent_for_llm(sections)
@@ -129,6 +141,8 @@ class CompanyAnalyzer:

            return analysis

+        except FileNotFoundError:
+            raise
        except Exception as e:
            return f"Failed to analyze patent {patent_id}: {e}"