Compare commits

..

1 Commits

Author SHA1 Message Date
agent-company b000146585 feat: configurable LLM model, SERP cache TTL, structured logging, fix patent_id type
- Make LLM model configurable via MODEL env var, default anthropic/claude-3.5-sonnet (#12)
- Expose SERP cache TTL as SERP_CACHE_TTL_HOURS env var, default 24 hours (#13)
- Fix Patent.patent_id type annotation from int to str in types.py (#14)
- Replace all print() calls with structured logging in analyzer.py and llm.py (#11)
- Add LOG_LEVEL config with basicConfig setup in config.py
- Add model and serp_cache_ttl_hours to config.py

Closes leeworks-agents/SPARC#11
Closes leeworks-agents/SPARC#12
Closes leeworks-agents/SPARC#13
Closes leeworks-agents/SPARC#14

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-26 06:03:25 +00:00
14 changed files with 67 additions and 149 deletions
-37
View File
@@ -9,43 +9,7 @@ on:
workflow_dispatch: workflow_dispatch:
jobs: jobs:
test:
runs-on: ubuntu-latest
steps:
- name: Install system dependencies
shell: sh
run: |
apk add --no-cache git python3 py3-pip gcc musl-dev libpq-dev python3-dev
- name: Checkout code
shell: sh
run: |
git clone http://gitea.gitea.svc.cluster.local/${{ gitea.repository }}.git .
git checkout ${{ gitea.sha }}
- name: Install Python dependencies
shell: sh
run: |
pip3 install --break-system-packages -r requirements.txt ruff
- name: Run ruff linter
shell: sh
run: |
ruff check SPARC/ tests/
- name: Run pytest
shell: sh
env:
DATABASE_URL: "sqlite://"
API_KEY: "test-key"
OPENROUTER_API_KEY: "test-key"
JWT_SECRET: "test-secret-for-ci"
APP_ENV: "development"
run: |
python3 -m pytest tests/ -v --tb=short -x
build-api: build-api:
needs: test
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Install dependencies - name: Install dependencies
@@ -117,7 +81,6 @@ jobs:
echo "API image available at ${{ steps.tags.outputs.IMAGE_TAG }}" echo "API image available at ${{ steps.tags.outputs.IMAGE_TAG }}"
build-frontend: build-frontend:
needs: test
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Install dependencies - name: Install dependencies
-46
View File
@@ -1,46 +0,0 @@
name: Test and Lint
on:
push:
branches:
- main
pull_request:
branches:
- main
workflow_dispatch:
jobs:
test:
runs-on: ubuntu-latest
steps:
- name: Install system dependencies
shell: sh
run: |
apk add --no-cache git python3 py3-pip gcc musl-dev libpq-dev python3-dev
- name: Checkout code
shell: sh
run: |
git clone http://gitea.gitea.svc.cluster.local/${{ gitea.repository }}.git .
git checkout ${{ gitea.sha }}
- name: Install Python dependencies
shell: sh
run: |
pip3 install --break-system-packages -r requirements.txt ruff
- name: Run ruff linter
shell: sh
run: |
ruff check SPARC/ tests/
- name: Run pytest
shell: sh
env:
DATABASE_URL: "sqlite://"
API_KEY: "test-key"
OPENROUTER_API_KEY: "test-key"
JWT_SECRET: "test-secret-for-ci"
APP_ENV: "development"
run: |
python3 -m pytest tests/ -v --tb=short -x
+2 -3
View File
@@ -1,4 +1,3 @@
from .types import Patent as Patent from .types import Patents, Patent
from .types import Patents as Patents
__all__ = ["Patents", "Patent"] all = ["Patents", "Patent"]
+23 -18
View File
@@ -5,14 +5,17 @@ to provide company performance estimation based on patent portfolios.
""" """
import hashlib import hashlib
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Callable from typing import Callable
from SPARC import config from SPARC import config
logger = logging.getLogger(__name__)
from SPARC.database import DatabaseClient from SPARC.database import DatabaseClient
from SPARC.llm import LLMAnalyzer
from SPARC.serp_api import SERP from SPARC.serp_api import SERP
from SPARC.types import BatchAnalysisResult, CompanyAnalysisResult, Patent, Patents from SPARC.llm import LLMAnalyzer
from SPARC.types import Patent, Patents, CompanyAnalysisResult, BatchAnalysisResult
class CompanyAnalyzer: class CompanyAnalyzer:
@@ -52,13 +55,13 @@ class CompanyAnalyzer:
query_hash = hashlib.sha256(company_name.lower().encode()).hexdigest() query_hash = hashlib.sha256(company_name.lower().encode()).hexdigest()
cached_ids = self.db.get_cached_serp_query(query_hash) cached_ids = self.db.get_cached_serp_query(query_hash)
if cached_ids is not None: if cached_ids is not None:
print(f"Using cached SERP results for {company_name} ({len(cached_ids)} patents)") logger.info("Using cached SERP results for %s (%d patents)", company_name, len(cached_ids))
patents = Patents(patents=[ patents = Patents(patents=[
Patent(patent_id=pid, pdf_link="") Patent(patent_id=pid, pdf_link="")
for pid in cached_ids for pid in cached_ids
]) ])
else: else:
print(f"Retrieving patents for {company_name}...") logger.info("Retrieving patents for %s...", company_name)
patents = SERP.query(company_name) patents = SERP.query(company_name)
# Cache the SERP results # Cache the SERP results
if patents.patents: if patents.patents:
@@ -66,12 +69,13 @@ class CompanyAnalyzer:
company_name=company_name, company_name=company_name,
query_hash=query_hash, query_hash=query_hash,
patent_ids=[p.patent_id for p in patents.patents], patent_ids=[p.patent_id for p in patents.patents],
ttl_hours=config.serp_cache_ttl_hours,
) )
if not patents.patents: if not patents.patents:
return f"No patents found for {company_name}" return f"No patents found for {company_name}"
print(f"Found {len(patents.patents)} patents. Processing...") logger.info("Found %d patents. Processing...", len(patents.patents))
# Download, parse, and minimize patents in parallel # Download, parse, and minimize patents in parallel
processed_patents = [] processed_patents = []
@@ -87,12 +91,12 @@ class CompanyAnalyzer:
if result: if result:
processed_patents.append(result) processed_patents.append(result)
except Exception as e: except Exception as e:
print(f"Warning: Failed to process {patent.patent_id}: {e}") logger.warning("Failed to process %s: %s", patent.patent_id, e)
if not processed_patents: if not processed_patents:
return f"Failed to process any patents for {company_name}" return f"Failed to process any patents for {company_name}"
print("Analyzing portfolio with LLM...") logger.info("Analyzing portfolio with LLM...")
# Analyze the full portfolio with LLM # Analyze the full portfolio with LLM
analysis = self.llm_analyzer.analyze_patent_portfolio( analysis = self.llm_analyzer.analyze_patent_portfolio(
@@ -122,6 +126,7 @@ class CompanyAnalyzer:
FileNotFoundError: If the patent PDF is not found at the expected path. FileNotFoundError: If the patent PDF is not found at the expected path.
""" """
import os import os
logger.info("Analyzing patent %s for %s...", patent_id, company_name)
patent_path = f"patents/{patent_id}.pdf" patent_path = f"patents/{patent_id}.pdf"
@@ -183,7 +188,7 @@ class CompanyAnalyzer:
return {"patent_id": patent.patent_id, "content": minimized_content} return {"patent_id": patent.patent_id, "content": minimized_content}
except Exception as e: except Exception as e:
print(f"Warning: Failed to process {patent.patent_id}: {e}") logger.warning("Failed to process %s: %s", patent.patent_id, e)
return None return None
def _analyze_company_safe(self, company_name: str) -> CompanyAnalysisResult: def _analyze_company_safe(self, company_name: str) -> CompanyAnalysisResult:
@@ -254,7 +259,7 @@ class CompanyAnalyzer:
results: list[CompanyAnalysisResult] = [] results: list[CompanyAnalysisResult] = []
total = len(companies) total = len(companies)
print(f"Starting batch analysis of {total} companies...") logger.info("Starting batch analysis of %d companies...", total)
with ThreadPoolExecutor(max_workers=max_workers) as executor: with ThreadPoolExecutor(max_workers=max_workers) as executor:
future_to_company = { future_to_company = {
@@ -271,8 +276,8 @@ class CompanyAnalyzer:
result = future.result() result = future.result()
results.append(result) results.append(result)
status = "" if result.success else "" status = "OK" if result.success else "FAIL"
print(f"[{completed}/{total}] {status} {company}") logger.info("[%d/%d] %s %s", completed, total, status, company)
if progress_callback: if progress_callback:
progress_callback(company, completed, total) progress_callback(company, completed, total)
@@ -287,12 +292,12 @@ class CompanyAnalyzer:
error=str(e), error=str(e),
) )
) )
print(f"[{completed}/{total}] ✗ {company}: {e}") logger.error("[%d/%d] FAIL %s: %s", completed, total, company, e)
successful = sum(1 for r in results if r.success) successful = sum(1 for r in results if r.success)
failed = total - successful failed = total - successful
print(f"\nBatch complete: {successful} succeeded, {failed} failed") logger.info("Batch complete: %d succeeded, %d failed", successful, failed)
return BatchAnalysisResult( return BatchAnalysisResult(
results=results, results=results,
@@ -318,20 +323,20 @@ class CompanyAnalyzer:
results: list[CompanyAnalysisResult] = [] results: list[CompanyAnalysisResult] = []
total = len(companies) total = len(companies)
print(f"Starting sequential analysis of {total} companies...") logger.info("Starting sequential analysis of %d companies...", total)
for idx, company in enumerate(companies, 1): for idx, company in enumerate(companies, 1):
print(f"\n[{idx}/{total}] Analyzing {company}...") logger.info("[%d/%d] Analyzing %s...", idx, total, company)
result = self._analyze_company_safe(company) result = self._analyze_company_safe(company)
results.append(result) results.append(result)
status = "" if result.success else "" status = "OK" if result.success else "FAIL"
print(f"[{idx}/{total}] {status} {company}") logger.info("[%d/%d] %s %s", idx, total, status, company)
successful = sum(1 for r in results if r.success) successful = sum(1 for r in results if r.success)
failed = total - successful failed = total - successful
print(f"\nBatch complete: {successful} succeeded, {failed} failed") logger.info("Batch complete: %d succeeded, %d failed", successful, failed)
return BatchAnalysisResult( return BatchAnalysisResult(
results=results, results=results,
+14
View File
@@ -2,12 +2,20 @@
Loads environment variables from .env file for API keys and other secrets. Loads environment variables from .env file for API keys and other secrets.
""" """
import logging
import os import os
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
# Logging configuration
log_level = os.getenv("LOG_LEVEL", "INFO").upper()
logging.basicConfig(
level=getattr(logging, log_level, logging.INFO),
format="%(asctime)s %(levelname)s %(name)s %(message)s",
)
# SerpAPI key for patent search # SerpAPI key for patent search
api_key = os.getenv("API_KEY") api_key = os.getenv("API_KEY")
@@ -31,6 +39,12 @@ use_database = os.getenv("USE_DATABASE", "false").lower() in ("true", "1", "yes"
patent_search_days = int(os.getenv("PATENT_SEARCH_DAYS", "90")) patent_search_days = int(os.getenv("PATENT_SEARCH_DAYS", "90"))
patent_thread_workers = int(os.getenv("PATENT_THREAD_WORKERS", "5")) patent_thread_workers = int(os.getenv("PATENT_THREAD_WORKERS", "5"))
# LLM model to use via OpenRouter (e.g. "anthropic/claude-3.5-sonnet", "openai/gpt-4o")
model = os.getenv("MODEL", "anthropic/claude-3.5-sonnet")
# SERP cache TTL in hours (how long cached search results are considered fresh)
serp_cache_ttl_hours = int(os.getenv("SERP_CACHE_TTL_HOURS", "24"))
# Root path for running behind a reverse proxy (e.g., "/api" when served at /api/) # Root path for running behind a reverse proxy (e.g., "/api" when served at /api/)
# This ensures OpenAPI docs work correctly when accessed via the proxy # This ensures OpenAPI docs work correctly when accessed via the proxy
root_path = os.getenv("ROOT_PATH", "") root_path = os.getenv("ROOT_PATH", "")
+6 -7
View File
@@ -1,15 +1,14 @@
"""Database client for storing and retrieving LLM messages and user authentication.""" """Database client for storing and retrieving LLM messages and user authentication."""
import contextlib import contextlib
import hashlib
import json
from datetime import datetime, timedelta
from typing import Dict, List, Optional
import bcrypt
import psycopg2 import psycopg2
from psycopg2.extras import RealDictCursor
from psycopg2.pool import ThreadedConnectionPool from psycopg2.pool import ThreadedConnectionPool
from psycopg2.extras import RealDictCursor
from typing import Dict, List, Optional
from datetime import datetime, timedelta
import json
import hashlib
import bcrypt
class DatabaseClient: class DatabaseClient:
+6 -7
View File
@@ -1,5 +1,6 @@
"""LLM integration for patent analysis using OpenRouter.""" """LLM integration for patent analysis using OpenRouter."""
import logging
from typing import Dict from typing import Dict
from openai import OpenAI from openai import OpenAI
@@ -7,6 +8,8 @@ from openai import OpenAI
from SPARC import config from SPARC import config
from SPARC.database import DatabaseClient from SPARC.database import DatabaseClient
logger = logging.getLogger(__name__)
class LLMAnalyzer: class LLMAnalyzer:
"""Handles LLM-based analysis of patent content.""" """Handles LLM-based analysis of patent content."""
@@ -22,7 +25,7 @@ class LLMAnalyzer:
""" """
self.test_mode = test_mode self.test_mode = test_mode
self.use_cache = use_cache if use_cache is not None else config.use_cache self.use_cache = use_cache if use_cache is not None else config.use_cache
self.model = "anthropic/claude-3.5-sonnet" self.model = config.model
# Always initialize database client for storage and caching # Always initialize database client for storage and caching
self.db_client = DatabaseClient(config.database_url) self.db_client = DatabaseClient(config.database_url)
@@ -61,11 +64,7 @@ Patent Content:
Provide a concise analysis (2-3 paragraphs) focusing on what this patent reveals about the company's technical direction and competitive advantage.""" Provide a concise analysis (2-3 paragraphs) focusing on what this patent reveals about the company's technical direction and competitive advantage."""
if self.test_mode: if self.test_mode:
print("=" * 80) logger.debug("TEST MODE - Prompt that would be sent to LLM:\n%s", prompt)
print("TEST MODE - Prompt that would be sent to LLM:")
print("=" * 80)
print(prompt)
print("=" * 80)
return "[TEST MODE - No API call made]" return "[TEST MODE - No API call made]"
# Check cache first # Check cache first
@@ -167,7 +166,7 @@ Patent Portfolio:
Provide a comprehensive analysis (4-5 paragraphs) with a final verdict on the company's innovation strength and performance outlook.""" Provide a comprehensive analysis (4-5 paragraphs) with a final verdict on the company's innovation strength and performance outlook."""
if self.test_mode: if self.test_mode:
print(prompt) logger.debug("TEST MODE - Portfolio prompt:\n%s", prompt)
return "[TEST MODE]" return "[TEST MODE]"
metadata = { metadata = {
+5 -8
View File
@@ -1,15 +1,12 @@
import os import os
import serpapi
from SPARC import config
import re import re
from datetime import datetime, timedelta
from typing import Dict
import pdfplumber # pip install pdfplumber import pdfplumber # pip install pdfplumber
import requests import requests
import serpapi from datetime import datetime, timedelta
from typing import Dict
from SPARC import config from SPARC.types import Patents, Patent
from SPARC.types import Patent, Patents
class SERP: class SERP:
def query(company: str, days_back: int = None) -> Patents: def query(company: str, days_back: int = None) -> Patents:
+1 -1
View File
@@ -4,7 +4,7 @@ from datetime import datetime
@dataclass @dataclass
class Patent: class Patent:
patent_id: int patent_id: str
pdf_link: str pdf_link: str
pdf_path: str | None = None pdf_path: str | None = None
summary: dict | None = None summary: dict | None = None
-8
View File
@@ -1,8 +0,0 @@
[lint]
select = ["E", "F", "I"]
ignore = [
"E501", # line too long (handled by formatter)
]
[lint.per-file-ignores]
"tests/*" = ["E402", "F841"] # allow import not at top of file, unused vars (mocks) in tests
+3 -5
View File
@@ -1,11 +1,9 @@
"""Tests for the high-level company analyzer orchestration.""" """Tests for the high-level company analyzer orchestration."""
from unittest.mock import MagicMock, Mock
import pytest import pytest
from unittest.mock import Mock, patch, call, MagicMock
from SPARC.analyzer import CompanyAnalyzer from SPARC.analyzer import CompanyAnalyzer
from SPARC.types import BatchAnalysisResult, Patent, Patents from SPARC.types import Patent, Patents, CompanyAnalysisResult, BatchAnalysisResult
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
@@ -26,7 +24,7 @@ class TestCompanyAnalyzer:
"""Test analyzer initialization with API key.""" """Test analyzer initialization with API key."""
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer") mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
_analyzer = CompanyAnalyzer(openrouter_api_key="test-key") # noqa: F841 analyzer = CompanyAnalyzer(openrouter_api_key="test-key")
mock_llm.assert_called_once_with(api_key="test-key") mock_llm.assert_called_once_with(api_key="test-key")
+3 -4
View File
@@ -1,13 +1,12 @@
"""Tests for FastAPI web service endpoints.""" """Tests for FastAPI web service endpoints."""
from datetime import datetime
from unittest.mock import Mock
import pytest import pytest
from datetime import datetime
from unittest.mock import Mock, patch
from fastapi.testclient import TestClient from fastapi.testclient import TestClient
from SPARC.api import app from SPARC.api import app
from SPARC.types import BatchAnalysisResult, CompanyAnalysisResult from SPARC.types import CompanyAnalysisResult, BatchAnalysisResult
@pytest.fixture @pytest.fixture
+1 -3
View File
@@ -1,9 +1,7 @@
"""Tests for LLM analysis functionality.""" """Tests for LLM analysis functionality."""
from unittest.mock import Mock
import pytest import pytest
from unittest.mock import Mock, MagicMock, patch
from SPARC.llm import LLMAnalyzer from SPARC.llm import LLMAnalyzer
+3 -2
View File
@@ -1,8 +1,9 @@
"""Tests for SERP API patent retrieval and parsing functionality.""" """Tests for SERP API patent retrieval and parsing functionality."""
import os
import pytest
from unittest.mock import patch, Mock
from datetime import datetime, timedelta from datetime import datetime, timedelta
from unittest.mock import Mock
from SPARC.serp_api import SERP from SPARC.serp_api import SERP
from SPARC.types import Patent from SPARC.types import Patent