Compare commits
5 Commits
490850d7a6
...
3424384088
| Author | SHA1 | Date | |
|---|---|---|---|
| 3424384088 | |||
| 5141d9dd47 | |||
| 4e419166e8 | |||
| 7eb72ab549 | |||
| d371ceeec8 |
+15
-2
@@ -1,12 +1,25 @@
|
||||
FROM python:3.14-alpine3.23
|
||||
FROM python:3.12-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies for pdfplumber and psycopg2
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
libpq-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY requirements.txt .
|
||||
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY . .
|
||||
|
||||
CMD ["python3", "main.py"]
|
||||
# Create patents directory for PDF storage
|
||||
RUN mkdir -p /app/patents
|
||||
|
||||
# Expose ports for API and Dashboard
|
||||
EXPOSE 8000 8501
|
||||
|
||||
# Default command runs the API (can be overridden in docker-compose)
|
||||
CMD ["uvicorn", "SPARC.api:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
|
||||
|
||||
@@ -34,7 +34,25 @@ SPARC/
|
||||
|
||||
## Installation
|
||||
|
||||
### NixOS (Recommended)
|
||||
### Docker (Recommended)
|
||||
|
||||
```bash
|
||||
# Clone and configure
|
||||
git clone <repository-url>
|
||||
cd SPARC
|
||||
cp .env.example .env
|
||||
# Edit .env with your API keys
|
||||
|
||||
# Start all services (API, Dashboard, PostgreSQL)
|
||||
docker-compose up -d
|
||||
|
||||
# Access the services
|
||||
# - API: http://localhost:8000
|
||||
# - Dashboard: http://localhost:8501
|
||||
# - API Docs: http://localhost:8000/docs
|
||||
```
|
||||
|
||||
### NixOS
|
||||
|
||||
```bash
|
||||
nix develop
|
||||
@@ -262,4 +280,4 @@ For open source projects, say how it is licensed.
|
||||
|
||||
Core functionality complete. Ready for production use with API keys configured.
|
||||
|
||||
Next steps: API wrapper, containerization, and multi-company support.
|
||||
All major features implemented: REST API, Streamlit dashboard, Docker containerization, database storage, and multi-company batch processing.
|
||||
|
||||
+20
-2
@@ -8,6 +8,21 @@ from SPARC.types import Patents, Patent
|
||||
|
||||
class SERP:
|
||||
def query(company: str) -> Patents:
|
||||
"""Query Google Patents for a company's recent patents.
|
||||
|
||||
Args:
|
||||
company: Name of the company to search for
|
||||
|
||||
Returns:
|
||||
Patents object containing list of patents with PDF links
|
||||
|
||||
Note:
|
||||
Patents without PDF download links are skipped. This occurs when
|
||||
Google Patents doesn't have a PDF available for a particular patent
|
||||
(e.g., recently filed patents, certain international patents, or
|
||||
patents with restricted access). The returned count may be lower
|
||||
than the requested number of results.
|
||||
"""
|
||||
# Make API call
|
||||
params = {
|
||||
"engine": "google_patents",
|
||||
@@ -18,11 +33,14 @@ class SERP:
|
||||
"api_key": config.api_key,
|
||||
}
|
||||
search = serpapi.search(params)
|
||||
# Convert data into a list of publicationID
|
||||
# Convert results to Patent objects, skipping any without PDF links
|
||||
patent_ids = []
|
||||
list_of_patents = search["organic_results"]
|
||||
for patent in list_of_patents:
|
||||
patent_ids.append(Patent(patent_id=patent["publication_number"], pdf_link=patent["pdf"], summary=None))
|
||||
pdf_link = patent.get("pdf")
|
||||
if pdf_link:
|
||||
patent_ids.append(Patent(patent_id=patent["publication_number"], pdf_link=pdf_link, summary=None))
|
||||
# Patents without PDF links are skipped (see docstring for details)
|
||||
|
||||
return Patents(patents=patent_ids)
|
||||
|
||||
|
||||
+507
-91
@@ -17,11 +17,304 @@ from SPARC import config
|
||||
|
||||
st.set_page_config(
|
||||
page_title="SPARC Dashboard",
|
||||
page_icon="📊",
|
||||
page_icon="⚡",
|
||||
layout="wide",
|
||||
initial_sidebar_state="expanded",
|
||||
initial_sidebar_state="collapsed",
|
||||
)
|
||||
|
||||
# Modern CSS styling
|
||||
st.markdown("""
|
||||
<style>
|
||||
/* Hide default Streamlit elements */
|
||||
#MainMenu {visibility: hidden;}
|
||||
footer {visibility: hidden;}
|
||||
header {visibility: hidden;}
|
||||
|
||||
/* Root variables for theming */
|
||||
:root {
|
||||
--primary: #6366f1;
|
||||
--primary-dark: #4f46e5;
|
||||
--secondary: #0ea5e9;
|
||||
--success: #10b981;
|
||||
--warning: #f59e0b;
|
||||
--error: #ef4444;
|
||||
--bg-dark: #0f172a;
|
||||
--bg-card: #1e293b;
|
||||
--bg-card-hover: #334155;
|
||||
--text-primary: #f8fafc;
|
||||
--text-secondary: #94a3b8;
|
||||
--border: #334155;
|
||||
}
|
||||
|
||||
/* Main app background */
|
||||
.stApp {
|
||||
background: linear-gradient(135deg, #0f172a 0%, #1e1b4b 100%);
|
||||
}
|
||||
|
||||
/* Top navigation bar */
|
||||
.nav-container {
|
||||
background: rgba(30, 41, 59, 0.8);
|
||||
backdrop-filter: blur(12px);
|
||||
border-bottom: 1px solid rgba(99, 102, 241, 0.2);
|
||||
padding: 1rem 2rem;
|
||||
margin: -1rem -1rem 2rem -1rem;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
}
|
||||
|
||||
.nav-brand {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.75rem;
|
||||
}
|
||||
|
||||
.nav-brand h1 {
|
||||
font-size: 1.5rem;
|
||||
font-weight: 700;
|
||||
background: linear-gradient(135deg, #6366f1, #0ea5e9);
|
||||
-webkit-background-clip: text;
|
||||
-webkit-text-fill-color: transparent;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.nav-brand span {
|
||||
font-size: 0.75rem;
|
||||
color: var(--text-secondary);
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.1em;
|
||||
}
|
||||
|
||||
/* Card styling */
|
||||
.modern-card {
|
||||
background: rgba(30, 41, 59, 0.6);
|
||||
backdrop-filter: blur(8px);
|
||||
border: 1px solid rgba(99, 102, 241, 0.15);
|
||||
border-radius: 16px;
|
||||
padding: 1.5rem;
|
||||
margin-bottom: 1rem;
|
||||
transition: all 0.3s ease;
|
||||
}
|
||||
|
||||
.modern-card:hover {
|
||||
border-color: rgba(99, 102, 241, 0.4);
|
||||
box-shadow: 0 8px 32px rgba(99, 102, 241, 0.15);
|
||||
}
|
||||
|
||||
/* Metric cards */
|
||||
.metric-card {
|
||||
background: linear-gradient(135deg, rgba(99, 102, 241, 0.1), rgba(14, 165, 233, 0.1));
|
||||
border: 1px solid rgba(99, 102, 241, 0.2);
|
||||
border-radius: 12px;
|
||||
padding: 1.25rem;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.metric-value {
|
||||
font-size: 2rem;
|
||||
font-weight: 700;
|
||||
background: linear-gradient(135deg, #6366f1, #0ea5e9);
|
||||
-webkit-background-clip: text;
|
||||
-webkit-text-fill-color: transparent;
|
||||
}
|
||||
|
||||
.metric-label {
|
||||
font-size: 0.875rem;
|
||||
color: var(--text-secondary);
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.05em;
|
||||
margin-top: 0.25rem;
|
||||
}
|
||||
|
||||
/* Section headers */
|
||||
.section-header {
|
||||
font-size: 1.25rem;
|
||||
font-weight: 600;
|
||||
color: var(--text-primary);
|
||||
margin-bottom: 1rem;
|
||||
padding-bottom: 0.5rem;
|
||||
border-bottom: 2px solid rgba(99, 102, 241, 0.3);
|
||||
}
|
||||
|
||||
/* Input fields */
|
||||
.stTextInput > div > div > input,
|
||||
.stTextArea > div > div > textarea {
|
||||
background: rgba(30, 41, 59, 0.8) !important;
|
||||
border: 1px solid rgba(99, 102, 241, 0.3) !important;
|
||||
border-radius: 10px !important;
|
||||
color: var(--text-primary) !important;
|
||||
padding: 0.75rem 1rem !important;
|
||||
}
|
||||
|
||||
.stTextInput > div > div > input:focus,
|
||||
.stTextArea > div > div > textarea:focus {
|
||||
border-color: var(--primary) !important;
|
||||
box-shadow: 0 0 0 2px rgba(99, 102, 241, 0.2) !important;
|
||||
}
|
||||
|
||||
/* Buttons */
|
||||
.stButton > button {
|
||||
background: linear-gradient(135deg, #6366f1, #4f46e5) !important;
|
||||
color: white !important;
|
||||
border: none !important;
|
||||
border-radius: 10px !important;
|
||||
padding: 0.75rem 1.5rem !important;
|
||||
font-weight: 600 !important;
|
||||
transition: all 0.3s ease !important;
|
||||
box-shadow: 0 4px 14px rgba(99, 102, 241, 0.3) !important;
|
||||
}
|
||||
|
||||
.stButton > button:hover {
|
||||
transform: translateY(-2px) !important;
|
||||
box-shadow: 0 6px 20px rgba(99, 102, 241, 0.4) !important;
|
||||
}
|
||||
|
||||
/* Tabs styling */
|
||||
.stTabs [data-baseweb="tab-list"] {
|
||||
background: rgba(30, 41, 59, 0.6);
|
||||
border-radius: 12px;
|
||||
padding: 0.5rem;
|
||||
gap: 0.5rem;
|
||||
border: 1px solid rgba(99, 102, 241, 0.15);
|
||||
}
|
||||
|
||||
.stTabs [data-baseweb="tab"] {
|
||||
background: transparent;
|
||||
border-radius: 8px;
|
||||
color: var(--text-secondary);
|
||||
padding: 0.75rem 1.5rem;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.stTabs [aria-selected="true"] {
|
||||
background: linear-gradient(135deg, #6366f1, #4f46e5) !important;
|
||||
color: white !important;
|
||||
}
|
||||
|
||||
.stTabs [data-baseweb="tab-border"] {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.stTabs [data-baseweb="tab-highlight"] {
|
||||
display: none;
|
||||
}
|
||||
|
||||
/* Expander styling */
|
||||
.streamlit-expanderHeader {
|
||||
background: rgba(30, 41, 59, 0.6) !important;
|
||||
border: 1px solid rgba(99, 102, 241, 0.15) !important;
|
||||
border-radius: 10px !important;
|
||||
color: var(--text-primary) !important;
|
||||
}
|
||||
|
||||
.streamlit-expanderContent {
|
||||
background: rgba(30, 41, 59, 0.4) !important;
|
||||
border: 1px solid rgba(99, 102, 241, 0.1) !important;
|
||||
border-top: none !important;
|
||||
border-radius: 0 0 10px 10px !important;
|
||||
}
|
||||
|
||||
/* Slider */
|
||||
.stSlider > div > div > div {
|
||||
background: var(--primary) !important;
|
||||
}
|
||||
|
||||
/* Select box */
|
||||
.stSelectbox > div > div {
|
||||
background: rgba(30, 41, 59, 0.8) !important;
|
||||
border: 1px solid rgba(99, 102, 241, 0.3) !important;
|
||||
border-radius: 10px !important;
|
||||
}
|
||||
|
||||
/* Progress bar */
|
||||
.stProgress > div > div > div {
|
||||
background: linear-gradient(90deg, #6366f1, #0ea5e9) !important;
|
||||
}
|
||||
|
||||
/* Alerts */
|
||||
.stAlert {
|
||||
border-radius: 10px !important;
|
||||
border: none !important;
|
||||
}
|
||||
|
||||
/* Metrics */
|
||||
[data-testid="stMetricValue"] {
|
||||
background: linear-gradient(135deg, #6366f1, #0ea5e9);
|
||||
-webkit-background-clip: text;
|
||||
-webkit-text-fill-color: transparent;
|
||||
font-weight: 700;
|
||||
}
|
||||
|
||||
[data-testid="stMetricLabel"] {
|
||||
color: var(--text-secondary) !important;
|
||||
}
|
||||
|
||||
/* Plotly charts */
|
||||
.js-plotly-plot {
|
||||
border-radius: 12px;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
/* Status badges */
|
||||
.status-badge {
|
||||
display: inline-block;
|
||||
padding: 0.25rem 0.75rem;
|
||||
border-radius: 9999px;
|
||||
font-size: 0.75rem;
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
}
|
||||
|
||||
.status-success {
|
||||
background: rgba(16, 185, 129, 0.2);
|
||||
color: #10b981;
|
||||
border: 1px solid rgba(16, 185, 129, 0.3);
|
||||
}
|
||||
|
||||
.status-warning {
|
||||
background: rgba(245, 158, 11, 0.2);
|
||||
color: #f59e0b;
|
||||
border: 1px solid rgba(245, 158, 11, 0.3);
|
||||
}
|
||||
|
||||
.status-error {
|
||||
background: rgba(239, 68, 68, 0.2);
|
||||
color: #ef4444;
|
||||
border: 1px solid rgba(239, 68, 68, 0.3);
|
||||
}
|
||||
|
||||
/* Dividers */
|
||||
hr {
|
||||
border: none;
|
||||
border-top: 1px solid rgba(99, 102, 241, 0.2);
|
||||
margin: 1.5rem 0;
|
||||
}
|
||||
|
||||
/* Info boxes */
|
||||
.info-box {
|
||||
background: linear-gradient(135deg, rgba(99, 102, 241, 0.1), rgba(14, 165, 233, 0.05));
|
||||
border: 1px solid rgba(99, 102, 241, 0.2);
|
||||
border-radius: 12px;
|
||||
padding: 1rem 1.25rem;
|
||||
margin: 1rem 0;
|
||||
}
|
||||
|
||||
/* Feature list */
|
||||
.feature-item {
|
||||
display: flex;
|
||||
align-items: flex-start;
|
||||
gap: 0.75rem;
|
||||
padding: 0.75rem 0;
|
||||
border-bottom: 1px solid rgba(99, 102, 241, 0.1);
|
||||
}
|
||||
|
||||
.feature-icon {
|
||||
color: var(--primary);
|
||||
font-size: 1.25rem;
|
||||
}
|
||||
</style>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
|
||||
@st.cache_resource
|
||||
def get_analyzer():
|
||||
@@ -43,37 +336,44 @@ def get_db_client():
|
||||
|
||||
|
||||
def render_header():
|
||||
"""Render the dashboard header."""
|
||||
st.title("SPARC Dashboard")
|
||||
st.markdown("**Semiconductor Patent & Analytics Report Core**")
|
||||
st.markdown("---")
|
||||
"""Render the modern dashboard header."""
|
||||
st.markdown("""
|
||||
<div class="nav-container">
|
||||
<div class="nav-brand">
|
||||
<h1>⚡ SPARC</h1>
|
||||
<span>Semiconductor Patent Analytics</span>
|
||||
</div>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
|
||||
def render_sidebar():
|
||||
"""Render the sidebar with navigation and controls."""
|
||||
st.sidebar.title("Navigation")
|
||||
page = st.sidebar.radio(
|
||||
"Select Page",
|
||||
["Company Analysis", "Batch Analysis", "Analytics", "About"],
|
||||
)
|
||||
return page
|
||||
def render_navigation():
|
||||
"""Render horizontal tab navigation at the top."""
|
||||
tabs = st.tabs(["🔍 Company Analysis", "📦 Batch Analysis", "📊 Analytics", "ℹ️ About"])
|
||||
return tabs
|
||||
|
||||
|
||||
def render_company_analysis():
|
||||
"""Render single company analysis page."""
|
||||
st.header("Company Patent Analysis")
|
||||
st.markdown('<p class="section-header">Single Company Analysis</p>', unsafe_allow_html=True)
|
||||
st.markdown("Analyze a company's patent portfolio using AI-powered insights.")
|
||||
|
||||
col1, col2 = st.columns([2, 1])
|
||||
st.markdown("")
|
||||
|
||||
# Search card
|
||||
with st.container():
|
||||
col1, col2 = st.columns([3, 1])
|
||||
|
||||
with col1:
|
||||
company_name = st.text_input(
|
||||
"Company Name",
|
||||
placeholder="e.g., nvidia, intel, amd",
|
||||
placeholder="Enter company name (e.g., nvidia, intel, amd)",
|
||||
help="Enter the company name to analyze their patent portfolio",
|
||||
label_visibility="collapsed",
|
||||
)
|
||||
|
||||
with col2:
|
||||
analyze_btn = st.button("Analyze", type="primary", use_container_width=True)
|
||||
analyze_btn = st.button("🔍 Analyze", type="primary", use_container_width=True)
|
||||
|
||||
if analyze_btn and company_name:
|
||||
with st.spinner(f"Analyzing {company_name}..."):
|
||||
@@ -81,45 +381,57 @@ def render_company_analysis():
|
||||
result = analyzer._analyze_company_safe(company_name)
|
||||
|
||||
if result.success:
|
||||
st.success(f"Analysis complete for {company_name}")
|
||||
st.success(f"✓ Analysis complete for {company_name.upper()}")
|
||||
|
||||
# Metrics row
|
||||
st.markdown("")
|
||||
|
||||
# Metrics row with custom styling
|
||||
col1, col2, col3 = st.columns(3)
|
||||
with col1:
|
||||
st.metric("Patents Analyzed", result.patent_count)
|
||||
st.metric("Patents Found", result.patent_count)
|
||||
with col2:
|
||||
st.metric("Status", "Success")
|
||||
st.metric("Analysis Status", "Complete")
|
||||
with col3:
|
||||
st.metric("Timestamp", result.timestamp.strftime("%H:%M:%S"))
|
||||
|
||||
# Analysis content
|
||||
st.subheader("AI Analysis")
|
||||
st.markdown("")
|
||||
|
||||
# Analysis content in a styled container
|
||||
st.markdown('<p class="section-header">AI Analysis Results</p>', unsafe_allow_html=True)
|
||||
with st.container():
|
||||
st.markdown(result.analysis)
|
||||
|
||||
else:
|
||||
st.error(f"Analysis failed: {result.error}")
|
||||
|
||||
elif not company_name and analyze_btn:
|
||||
st.warning("Please enter a company name to analyze.")
|
||||
|
||||
|
||||
def render_batch_analysis():
|
||||
"""Render batch analysis page."""
|
||||
st.header("Batch Company Analysis")
|
||||
st.markdown('<p class="section-header">Batch Company Analysis</p>', unsafe_allow_html=True)
|
||||
st.markdown("Analyze multiple companies simultaneously for comparative insights.")
|
||||
|
||||
st.markdown(
|
||||
"Analyze multiple companies simultaneously. Enter company names separated by commas or newlines."
|
||||
)
|
||||
st.markdown("")
|
||||
|
||||
# Input section
|
||||
col1, col2 = st.columns([2, 1])
|
||||
|
||||
with col1:
|
||||
companies_input = st.text_area(
|
||||
"Company Names",
|
||||
placeholder="nvidia\namd\nintel\nqualcomm",
|
||||
placeholder="Enter company names (one per line or comma-separated):\nnvidia\namd\nintel\nqualcomm",
|
||||
height=150,
|
||||
label_visibility="collapsed",
|
||||
)
|
||||
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
max_workers = st.slider("Concurrent Workers", 1, 5, 3)
|
||||
with col2:
|
||||
st.markdown("**Configuration**")
|
||||
max_workers = st.slider("Concurrent Workers", 1, 5, 3, help="Number of parallel analysis threads")
|
||||
st.markdown("")
|
||||
analyze_btn = st.button(
|
||||
"Run Batch Analysis", type="primary", use_container_width=True
|
||||
"🚀 Run Batch Analysis", type="primary", use_container_width=True
|
||||
)
|
||||
|
||||
if analyze_btn and companies_input:
|
||||
@@ -134,7 +446,7 @@ def render_batch_analysis():
|
||||
st.warning("Please enter at least one company name")
|
||||
return
|
||||
|
||||
st.info(f"Starting analysis of {len(companies)} companies...")
|
||||
st.info(f"🔄 Starting analysis of {len(companies)} companies...")
|
||||
|
||||
# Progress tracking
|
||||
progress_bar = st.progress(0)
|
||||
@@ -154,10 +466,12 @@ def render_batch_analysis():
|
||||
)
|
||||
|
||||
progress_bar.progress(1.0)
|
||||
status_text.text("Analysis complete!")
|
||||
status_text.text("✓ Analysis complete!")
|
||||
|
||||
st.markdown("")
|
||||
|
||||
# Summary metrics
|
||||
st.subheader("Results Summary")
|
||||
st.markdown('<p class="section-header">Results Summary</p>', unsafe_allow_html=True)
|
||||
col1, col2, col3, col4 = st.columns(4)
|
||||
with col1:
|
||||
st.metric("Total Companies", result.total_companies)
|
||||
@@ -178,7 +492,7 @@ def render_batch_analysis():
|
||||
df = pd.DataFrame(
|
||||
[
|
||||
{
|
||||
"Company": r.company_name,
|
||||
"Company": r.company_name.upper(),
|
||||
"Patents": r.patent_count,
|
||||
"Status": "Success" if r.success else "Failed",
|
||||
}
|
||||
@@ -191,16 +505,34 @@ def render_batch_analysis():
|
||||
x="Company",
|
||||
y="Patents",
|
||||
color="Status",
|
||||
color_discrete_map={"Success": "#28a745", "Failed": "#dc3545"},
|
||||
title="Patents per Company",
|
||||
color_discrete_map={"Success": "#10b981", "Failed": "#ef4444"},
|
||||
title="",
|
||||
)
|
||||
fig.update_layout(
|
||||
plot_bgcolor="rgba(0,0,0,0)",
|
||||
paper_bgcolor="rgba(0,0,0,0)",
|
||||
font_color="#94a3b8",
|
||||
legend=dict(
|
||||
orientation="h",
|
||||
yanchor="bottom",
|
||||
y=1.02,
|
||||
xanchor="right",
|
||||
x=1
|
||||
),
|
||||
xaxis=dict(showgrid=False),
|
||||
yaxis=dict(showgrid=True, gridcolor="rgba(99, 102, 241, 0.1)"),
|
||||
)
|
||||
st.plotly_chart(fig, use_container_width=True)
|
||||
|
||||
st.markdown("")
|
||||
|
||||
# Individual results
|
||||
st.subheader("Individual Results")
|
||||
st.markdown('<p class="section-header">Detailed Results</p>', unsafe_allow_html=True)
|
||||
for r in result.results:
|
||||
status_icon = "✓" if r.success else "✗"
|
||||
status_class = "status-success" if r.success else "status-error"
|
||||
with st.expander(
|
||||
f"{'✓' if r.success else '✗'} {r.company_name} ({r.patent_count} patents)"
|
||||
f"{status_icon} {r.company_name.upper()} — {r.patent_count} patents"
|
||||
):
|
||||
if r.success:
|
||||
st.markdown(r.analysis)
|
||||
@@ -210,21 +542,28 @@ def render_batch_analysis():
|
||||
|
||||
def render_analytics():
|
||||
"""Render analytics page with database insights."""
|
||||
st.header("Analytics Dashboard")
|
||||
st.markdown('<p class="section-header">Analytics Dashboard</p>', unsafe_allow_html=True)
|
||||
st.markdown("Track historical analysis data and view insights.")
|
||||
|
||||
db_client = get_db_client()
|
||||
|
||||
if not db_client:
|
||||
st.warning(
|
||||
"Database mode is not enabled. Set USE_DATABASE=true in your .env file to enable analytics."
|
||||
)
|
||||
st.info(
|
||||
"Analytics features require storing analysis results in PostgreSQL for historical tracking."
|
||||
)
|
||||
st.markdown("")
|
||||
st.markdown("""
|
||||
<div class="info-box">
|
||||
<strong>⚠️ Database Not Connected</strong><br>
|
||||
<span style="color: #94a3b8;">Set <code>USE_DATABASE=true</code> in your .env file to enable analytics tracking.</span>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
st.info("Analytics features require storing analysis results in PostgreSQL for historical tracking.")
|
||||
return
|
||||
|
||||
st.markdown("")
|
||||
|
||||
# Time range selector
|
||||
days = st.selectbox("Time Range", [7, 14, 30, 90], index=0)
|
||||
col1, col2, col3 = st.columns([1, 2, 1])
|
||||
with col1:
|
||||
days = st.selectbox("Time Range", [7, 14, 30, 90], index=0, format_func=lambda x: f"Last {x} days")
|
||||
|
||||
try:
|
||||
analytics = db_client.get_analytics(days=days)
|
||||
@@ -233,8 +572,9 @@ def render_analytics():
|
||||
st.info("No analytics data available yet. Run some analyses first!")
|
||||
return
|
||||
|
||||
st.markdown("")
|
||||
|
||||
# Summary metrics
|
||||
st.subheader("Summary")
|
||||
col1, col2, col3 = st.columns(3)
|
||||
|
||||
with col1:
|
||||
@@ -249,6 +589,8 @@ def render_analytics():
|
||||
types = len(analytics.get("by_type", {}))
|
||||
st.metric("Analysis Types", types)
|
||||
|
||||
st.markdown("")
|
||||
|
||||
# Charts
|
||||
col1, col2 = st.columns(2)
|
||||
|
||||
@@ -256,10 +598,17 @@ def render_analytics():
|
||||
by_company = analytics.get("by_company", {})
|
||||
if by_company:
|
||||
df = pd.DataFrame(
|
||||
[{"Company": k, "Count": v} for k, v in by_company.items()]
|
||||
[{"Company": k.upper(), "Count": v} for k, v in by_company.items()]
|
||||
)
|
||||
fig = px.pie(
|
||||
df, values="Count", names="Company", title="Analyses by Company"
|
||||
df, values="Count", names="Company", title="Distribution by Company",
|
||||
hole=0.4,
|
||||
color_discrete_sequence=px.colors.sequential.Purp_r,
|
||||
)
|
||||
fig.update_layout(
|
||||
plot_bgcolor="rgba(0,0,0,0)",
|
||||
paper_bgcolor="rgba(0,0,0,0)",
|
||||
font_color="#94a3b8",
|
||||
)
|
||||
st.plotly_chart(fig, use_container_width=True)
|
||||
|
||||
@@ -269,19 +618,29 @@ def render_analytics():
|
||||
df = pd.DataFrame(
|
||||
[{"Type": k, "Count": v} for k, v in by_type.items()]
|
||||
)
|
||||
fig = px.bar(df, x="Type", y="Count", title="Analyses by Type")
|
||||
fig = px.bar(df, x="Type", y="Count", title="Analysis Types",
|
||||
color_discrete_sequence=["#6366f1"])
|
||||
fig.update_layout(
|
||||
plot_bgcolor="rgba(0,0,0,0)",
|
||||
paper_bgcolor="rgba(0,0,0,0)",
|
||||
font_color="#94a3b8",
|
||||
xaxis=dict(showgrid=False),
|
||||
yaxis=dict(showgrid=True, gridcolor="rgba(99, 102, 241, 0.1)"),
|
||||
)
|
||||
st.plotly_chart(fig, use_container_width=True)
|
||||
|
||||
st.markdown("")
|
||||
|
||||
# Recent messages
|
||||
st.subheader("Recent Analyses")
|
||||
st.markdown('<p class="section-header">Recent Analyses</p>', unsafe_allow_html=True)
|
||||
messages = db_client.get_messages(limit=10)
|
||||
|
||||
if messages:
|
||||
for msg in messages:
|
||||
with st.expander(
|
||||
f"{msg.get('company_name', 'Unknown')} - {msg.get('analysis_type', 'N/A')} ({msg.get('timestamp', 'N/A')})"
|
||||
f"📄 {msg.get('company_name', 'Unknown').upper()} — {msg.get('analysis_type', 'N/A')} ({msg.get('timestamp', 'N/A')})"
|
||||
):
|
||||
st.markdown(f"**Model:** {msg.get('model', 'N/A')}")
|
||||
st.markdown(f"**Model:** `{msg.get('model', 'N/A')}`")
|
||||
if msg.get("response"):
|
||||
st.markdown(msg["response"][:500] + "...")
|
||||
|
||||
@@ -291,70 +650,127 @@ def render_analytics():
|
||||
|
||||
def render_about():
|
||||
"""Render about page."""
|
||||
st.header("About SPARC")
|
||||
st.markdown('<p class="section-header">About SPARC</p>', unsafe_allow_html=True)
|
||||
|
||||
st.markdown(
|
||||
"""
|
||||
**SPARC** (Semiconductor Patent & Analytics Report Core) is a patent analysis
|
||||
system that estimates company performance by analyzing their patent portfolios
|
||||
using LLM-powered insights.
|
||||
col1, col2 = st.columns([2, 1])
|
||||
|
||||
### Features
|
||||
with col1:
|
||||
st.markdown("""
|
||||
**SPARC** (Semiconductor Patent & Analytics Report Core) is an AI-powered patent analysis
|
||||
platform that evaluates company performance by analyzing their patent portfolios
|
||||
with cutting-edge language models.
|
||||
""")
|
||||
|
||||
- **Patent Retrieval**: Automated collection via SerpAPI's Google Patents engine
|
||||
- **Intelligent Parsing**: Extracts key sections from patent PDFs
|
||||
- **AI Analysis**: Uses Claude 3.5 Sonnet for deep analysis
|
||||
- **Batch Processing**: Analyze multiple companies concurrently
|
||||
- **REST API**: FastAPI web service for integration
|
||||
- **Analytics**: Track and visualize analysis history
|
||||
st.markdown("")
|
||||
st.markdown("**Key Features**")
|
||||
|
||||
### Technology Stack
|
||||
features = [
|
||||
("🔍", "Patent Retrieval", "Automated collection via SerpAPI's Google Patents"),
|
||||
("📄", "Intelligent Parsing", "Extracts key sections from patent documents"),
|
||||
("🤖", "AI Analysis", "Deep analysis powered by Claude 3.5 Sonnet"),
|
||||
("⚡", "Batch Processing", "Analyze multiple companies concurrently"),
|
||||
("🌐", "REST API", "FastAPI web service for seamless integration"),
|
||||
("📊", "Analytics", "Track and visualize historical analysis data"),
|
||||
]
|
||||
|
||||
- **Backend**: Python, FastAPI
|
||||
- **AI**: Claude 3.5 Sonnet via OpenRouter
|
||||
- **Database**: PostgreSQL
|
||||
- **Dashboard**: Streamlit, Plotly
|
||||
- **Patent Data**: SerpAPI Google Patents
|
||||
for icon, title, desc in features:
|
||||
st.markdown(f"""
|
||||
<div class="feature-item">
|
||||
<span class="feature-icon">{icon}</span>
|
||||
<div>
|
||||
<strong>{title}</strong><br>
|
||||
<span style="color: #94a3b8; font-size: 0.875rem;">{desc}</span>
|
||||
</div>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
### Links
|
||||
with col2:
|
||||
st.markdown("**Technology Stack**")
|
||||
st.markdown("""
|
||||
<div class="info-box">
|
||||
<div style="display: grid; gap: 0.5rem;">
|
||||
<div><span style="color: #6366f1;">Backend</span><br><span style="color: #94a3b8;">Python, FastAPI</span></div>
|
||||
<div><span style="color: #6366f1;">AI Model</span><br><span style="color: #94a3b8;">Claude 3.5 Sonnet</span></div>
|
||||
<div><span style="color: #6366f1;">Database</span><br><span style="color: #94a3b8;">PostgreSQL</span></div>
|
||||
<div><span style="color: #6366f1;">Dashboard</span><br><span style="color: #94a3b8;">Streamlit, Plotly</span></div>
|
||||
<div><span style="color: #6366f1;">Data Source</span><br><span style="color: #94a3b8;">SerpAPI Patents</span></div>
|
||||
</div>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
- API Docs: `http://localhost:8000/docs`
|
||||
- Health Check: `http://localhost:8000/health`
|
||||
"""
|
||||
)
|
||||
st.markdown("")
|
||||
st.markdown("**API Endpoints**")
|
||||
st.code("http://localhost:8000/docs", language=None)
|
||||
st.code("http://localhost:8000/health", language=None)
|
||||
|
||||
st.markdown("")
|
||||
st.markdown("")
|
||||
|
||||
# System status
|
||||
st.subheader("System Status")
|
||||
st.markdown('<p class="section-header">System Status</p>', unsafe_allow_html=True)
|
||||
|
||||
col1, col2 = st.columns(2)
|
||||
col1, col2, col3 = st.columns(3)
|
||||
|
||||
with col1:
|
||||
db_client = get_db_client()
|
||||
if db_client:
|
||||
st.success("Database: Connected")
|
||||
st.markdown("""
|
||||
<div class="metric-card">
|
||||
<div style="color: #10b981; font-size: 1.5rem;">●</div>
|
||||
<div class="metric-label">Database</div>
|
||||
<div style="color: #10b981; font-weight: 600;">Connected</div>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
else:
|
||||
st.warning("Database: Not configured")
|
||||
st.markdown("""
|
||||
<div class="metric-card">
|
||||
<div style="color: #f59e0b; font-size: 1.5rem;">●</div>
|
||||
<div class="metric-label">Database</div>
|
||||
<div style="color: #f59e0b; font-weight: 600;">Not Configured</div>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
with col2:
|
||||
analyzer = get_analyzer()
|
||||
if analyzer:
|
||||
st.success("Analyzer: Ready")
|
||||
st.markdown("""
|
||||
<div class="metric-card">
|
||||
<div style="color: #10b981; font-size: 1.5rem;">●</div>
|
||||
<div class="metric-label">Analyzer</div>
|
||||
<div style="color: #10b981; font-weight: 600;">Ready</div>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
else:
|
||||
st.error("Analyzer: Not initialized")
|
||||
st.markdown("""
|
||||
<div class="metric-card">
|
||||
<div style="color: #ef4444; font-size: 1.5rem;">●</div>
|
||||
<div class="metric-label">Analyzer</div>
|
||||
<div style="color: #ef4444; font-weight: 600;">Not Initialized</div>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
with col3:
|
||||
st.markdown("""
|
||||
<div class="metric-card">
|
||||
<div style="color: #10b981; font-size: 1.5rem;">●</div>
|
||||
<div class="metric-label">Dashboard</div>
|
||||
<div style="color: #10b981; font-weight: 600;">Online</div>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
|
||||
def main():
|
||||
"""Main dashboard entry point."""
|
||||
render_header()
|
||||
page = render_sidebar()
|
||||
tabs = render_navigation()
|
||||
|
||||
if page == "Company Analysis":
|
||||
with tabs[0]:
|
||||
render_company_analysis()
|
||||
elif page == "Batch Analysis":
|
||||
with tabs[1]:
|
||||
render_batch_analysis()
|
||||
elif page == "Analytics":
|
||||
with tabs[2]:
|
||||
render_analytics()
|
||||
elif page == "About":
|
||||
with tabs[3]:
|
||||
render_about()
|
||||
|
||||
|
||||
|
||||
+42
-8
@@ -12,25 +12,59 @@ services:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U postgres"]
|
||||
interval: 10s
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
restart: unless-stopped
|
||||
|
||||
app:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
container_name: sparc-app
|
||||
init-db:
|
||||
build: .
|
||||
container_name: sparc-init-db
|
||||
command: python scripts/init_database.py
|
||||
environment:
|
||||
DATABASE_URL: postgresql://postgres:postgres@postgres:5432/sparc
|
||||
USE_DATABASE: "true"
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
restart: "no"
|
||||
|
||||
api:
|
||||
build: .
|
||||
container_name: sparc-api
|
||||
command: uvicorn SPARC.api:app --host 0.0.0.0 --port 8000
|
||||
environment:
|
||||
USE_DATABASE: true
|
||||
API_KEY: ${API_KEY}
|
||||
OPENROUTER_API_KEY: ${OPENROUTER_API_KEY}
|
||||
DATABASE_URL: postgresql://postgres:postgres@postgres:5432/sparc
|
||||
USE_DATABASE: "true"
|
||||
ports:
|
||||
- "8000:8000"
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
init-db:
|
||||
condition: service_completed_successfully
|
||||
volumes:
|
||||
- .:/app
|
||||
- ./patents:/app/patents
|
||||
restart: unless-stopped
|
||||
|
||||
dashboard:
|
||||
build: .
|
||||
container_name: sparc-dashboard
|
||||
command: streamlit run dashboard.py --server.port 8501 --server.address 0.0.0.0
|
||||
environment:
|
||||
API_KEY: ${API_KEY}
|
||||
OPENROUTER_API_KEY: ${OPENROUTER_API_KEY}
|
||||
DATABASE_URL: postgresql://postgres:postgres@postgres:5432/sparc
|
||||
USE_DATABASE: "true"
|
||||
ports:
|
||||
- "8501:8501"
|
||||
depends_on:
|
||||
- api
|
||||
volumes:
|
||||
- ./patents:/app/patents
|
||||
restart: unless-stopped
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
|
||||
+62
-106
@@ -55,28 +55,25 @@ USE_DATABASE=true
|
||||
## Step 2: Start Services with Docker Compose
|
||||
|
||||
```bash
|
||||
# Start PostgreSQL database
|
||||
docker-compose up -d postgres
|
||||
# Start all services (PostgreSQL, API, and Dashboard)
|
||||
docker-compose up -d
|
||||
|
||||
# Wait for postgres to be healthy (check with)
|
||||
# Check status
|
||||
docker-compose ps
|
||||
|
||||
# You should see sparc-postgres with status "healthy"
|
||||
# You should see:
|
||||
# - sparc-postgres (healthy)
|
||||
# - sparc-api (running on port 8000)
|
||||
# - sparc-dashboard (running on port 8501)
|
||||
```
|
||||
|
||||
The database is automatically initialized by the `init-db` service.
|
||||
|
||||
---
|
||||
|
||||
## Step 3: Initialize the Database
|
||||
## Step 3: Database Schema
|
||||
|
||||
```bash
|
||||
# Option A: If running locally with Python
|
||||
python scripts/init_database.py
|
||||
|
||||
# Option B: If using Docker, run inside container
|
||||
docker-compose run --rm sparc-app python scripts/init_database.py
|
||||
```
|
||||
|
||||
This creates the `llm_messages` table with the following schema:
|
||||
The `init-db` service automatically creates the `llm_messages` table with the following schema:
|
||||
|
||||
| Column | Type | Purpose |
|
||||
|--------|------|---------|
|
||||
@@ -95,9 +92,30 @@ This creates the `llm_messages` table with the following schema:
|
||||
|
||||
## Step 4: Run the Services
|
||||
|
||||
### Option A: Run Locally (Development)
|
||||
### Option A: Run with Docker Compose (Recommended)
|
||||
|
||||
All services are started automatically with `docker-compose up -d` from Step 2.
|
||||
|
||||
```bash
|
||||
# View logs
|
||||
docker-compose logs -f
|
||||
|
||||
# View specific service logs
|
||||
docker-compose logs -f api
|
||||
docker-compose logs -f dashboard
|
||||
```
|
||||
|
||||
### Option B: Run Locally (Development)
|
||||
|
||||
If you prefer running services locally without Docker:
|
||||
|
||||
```bash
|
||||
# Start PostgreSQL with Docker
|
||||
docker-compose up -d postgres
|
||||
|
||||
# Wait for database to be healthy, then initialize
|
||||
python scripts/init_database.py
|
||||
|
||||
# Terminal 1: Start FastAPI backend
|
||||
uvicorn SPARC.api:app --host 0.0.0.0 --port 8000 --reload
|
||||
|
||||
@@ -105,14 +123,6 @@ uvicorn SPARC.api:app --host 0.0.0.0 --port 8000 --reload
|
||||
streamlit run dashboard.py --server.port 8501 --server.address 0.0.0.0
|
||||
```
|
||||
|
||||
### Option B: Run with Docker (Production)
|
||||
|
||||
See [Production Docker Compose](#production-docker-compose) section below for a complete `docker-compose.prod.yml` configuration.
|
||||
|
||||
```bash
|
||||
docker-compose -f docker-compose.prod.yml up -d
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 5: Verify Deployment
|
||||
@@ -256,97 +266,41 @@ postgresql://postgres:postgres@localhost:5432/sparc
|
||||
|
||||
---
|
||||
|
||||
## Production Docker Compose
|
||||
## Docker Compose Services
|
||||
|
||||
Create a `docker-compose.prod.yml` file for full production deployment:
|
||||
The `docker-compose.yml` includes all services needed for production:
|
||||
|
||||
```yaml
|
||||
version: '3.8'
|
||||
| Service | Container | Port | Description |
|
||||
|---------|-----------|------|-------------|
|
||||
| `postgres` | sparc-postgres | 5432 | PostgreSQL database |
|
||||
| `init-db` | sparc-init-db | - | One-time database initialization |
|
||||
| `api` | sparc-api | 8000 | FastAPI REST API |
|
||||
| `dashboard` | sparc-dashboard | 8501 | Streamlit web UI |
|
||||
|
||||
services:
|
||||
postgres:
|
||||
image: postgres:16-alpine
|
||||
container_name: sparc-postgres
|
||||
environment:
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_PASSWORD: postgres
|
||||
POSTGRES_DB: sparc
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
ports:
|
||||
- "5432:5432"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U postgres"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
restart: unless-stopped
|
||||
|
||||
api:
|
||||
build: .
|
||||
container_name: sparc-api
|
||||
command: uvicorn SPARC.api:app --host 0.0.0.0 --port 8000
|
||||
environment:
|
||||
- API_KEY=${API_KEY}
|
||||
- OPENROUTER_API_KEY=${OPENROUTER_API_KEY}
|
||||
- DATABASE_URL=postgresql://postgres:postgres@postgres:5432/sparc
|
||||
- USE_DATABASE=true
|
||||
ports:
|
||||
- "8000:8000"
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
volumes:
|
||||
- ./patents:/app/patents
|
||||
restart: unless-stopped
|
||||
|
||||
dashboard:
|
||||
build: .
|
||||
container_name: sparc-dashboard
|
||||
command: streamlit run dashboard.py --server.port 8501 --server.address 0.0.0.0
|
||||
environment:
|
||||
- API_KEY=${API_KEY}
|
||||
- OPENROUTER_API_KEY=${OPENROUTER_API_KEY}
|
||||
- DATABASE_URL=postgresql://postgres:postgres@postgres:5432/sparc
|
||||
- USE_DATABASE=true
|
||||
ports:
|
||||
- "8501:8501"
|
||||
depends_on:
|
||||
- api
|
||||
volumes:
|
||||
- ./patents:/app/patents
|
||||
restart: unless-stopped
|
||||
|
||||
init-db:
|
||||
build: .
|
||||
container_name: sparc-init-db
|
||||
command: python scripts/init_database.py
|
||||
environment:
|
||||
- DATABASE_URL=postgresql://postgres:postgres@postgres:5432/sparc
|
||||
- USE_DATABASE=true
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
restart: "no"
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
```
|
||||
|
||||
### Deploy with Production Compose
|
||||
### Common Docker Compose Commands
|
||||
|
||||
```bash
|
||||
# Start all services
|
||||
docker-compose -f docker-compose.prod.yml up -d
|
||||
docker-compose up -d
|
||||
|
||||
# Start with rebuild (after code changes)
|
||||
docker-compose up -d --build
|
||||
|
||||
# View logs
|
||||
docker-compose -f docker-compose.prod.yml logs -f
|
||||
docker-compose logs -f
|
||||
|
||||
# View specific service logs
|
||||
docker-compose logs -f api
|
||||
docker-compose logs -f dashboard
|
||||
|
||||
# Stop all services
|
||||
docker-compose -f docker-compose.prod.yml down
|
||||
docker-compose down
|
||||
|
||||
# Stop and remove volumes (WARNING: deletes data)
|
||||
docker-compose -f docker-compose.prod.yml down -v
|
||||
docker-compose down -v
|
||||
|
||||
# Restart a specific service
|
||||
docker-compose restart api
|
||||
```
|
||||
|
||||
---
|
||||
@@ -417,7 +371,12 @@ docker-compose logs -f dashboard
|
||||
## Quick Reference
|
||||
|
||||
```bash
|
||||
# Development setup
|
||||
# Docker setup (recommended)
|
||||
cp .env.example .env
|
||||
# Edit .env with API keys
|
||||
docker-compose up -d
|
||||
|
||||
# Local development setup
|
||||
cp .env.example .env
|
||||
# Edit .env with API keys
|
||||
docker-compose up -d postgres
|
||||
@@ -425,9 +384,6 @@ python scripts/init_database.py
|
||||
uvicorn SPARC.api:app --reload &
|
||||
streamlit run dashboard.py
|
||||
|
||||
# Production setup
|
||||
docker-compose -f docker-compose.prod.yml up -d
|
||||
|
||||
# Check status
|
||||
curl http://localhost:8000/health
|
||||
open http://localhost:8501
|
||||
|
||||
@@ -20,6 +20,14 @@
|
||||
packages = [
|
||||
python
|
||||
pkgs.python311Packages.virtualenv # gives `virtualenv` tool
|
||||
pkgs.zlib
|
||||
pkgs.stdenv.cc.cc.lib
|
||||
];
|
||||
|
||||
# Required for numpy and other C extension packages
|
||||
LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath [
|
||||
pkgs.zlib
|
||||
pkgs.stdenv.cc.cc.lib
|
||||
];
|
||||
|
||||
shellHook = ''
|
||||
|
||||
@@ -11,4 +11,5 @@ uvicorn[standard]
|
||||
httpx
|
||||
streamlit
|
||||
plotly
|
||||
numpy
|
||||
pandas
|
||||
|
||||
@@ -0,0 +1,227 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Estimate token usage per company portfolio for SPARC analysis."""
|
||||
|
||||
import tiktoken
|
||||
from typing import Dict, List, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class TokenEstimate:
|
||||
"""Token usage estimate for a company portfolio."""
|
||||
company_name: str
|
||||
patent_count: int
|
||||
prompt_tokens: int
|
||||
estimated_completion_tokens: int
|
||||
total_tokens: int
|
||||
cost_estimate_usd: float
|
||||
|
||||
|
||||
class TokenEstimator:
|
||||
"""Estimate token usage for SPARC patent analysis."""
|
||||
|
||||
# Claude 3.5 Sonnet pricing via OpenRouter (per 1M tokens)
|
||||
INPUT_COST_PER_1M = 3.00 # $3.00 per 1M input tokens
|
||||
OUTPUT_COST_PER_1M = 15.00 # $15.00 per 1M output tokens
|
||||
|
||||
# Estimated output tokens based on max_tokens settings
|
||||
SINGLE_PATENT_MAX_OUTPUT = 1024
|
||||
PORTFOLIO_MAX_OUTPUT = 2048
|
||||
|
||||
def __init__(self):
|
||||
# Use cl100k_base encoding (closest to Claude's tokenizer)
|
||||
self.encoder = tiktoken.get_encoding("cl100k_base")
|
||||
|
||||
def count_tokens(self, text: str) -> int:
|
||||
"""Count tokens in a text string."""
|
||||
return len(self.encoder.encode(text))
|
||||
|
||||
def build_single_patent_prompt(self, patent_content: str, company_name: str) -> str:
|
||||
"""Build prompt for single patent analysis (matches llm.py)."""
|
||||
return f"""You are a patent analyst evaluating {company_name}'s innovation strategy.
|
||||
|
||||
Analyze the following patent content and provide insights on:
|
||||
1. Innovation quality and novelty
|
||||
2. Technical complexity and defensibility
|
||||
3. Market potential and commercial viability
|
||||
4. Strategic positioning relative to industry trends
|
||||
|
||||
Patent Content:
|
||||
{patent_content}
|
||||
|
||||
Provide a concise analysis (2-3 paragraphs) focusing on what this patent reveals about the company's technical direction and competitive advantage."""
|
||||
|
||||
def build_portfolio_prompt(self, patents_data: List[Dict[str, str]], company_name: str) -> str:
|
||||
"""Build prompt for portfolio analysis (matches llm.py)."""
|
||||
portfolio_summary = []
|
||||
for idx, patent in enumerate(patents_data, 1):
|
||||
portfolio_summary.append(
|
||||
f"Patent {idx} ({patent['patent_id']}):\n{patent['content']}"
|
||||
)
|
||||
combined_content = "\n\n---\n\n".join(portfolio_summary)
|
||||
|
||||
return f"""You are analyzing {company_name}'s patent portfolio to estimate their future performance and innovation trajectory.
|
||||
|
||||
You have {len(patents_data)} recent patents to analyze. Evaluate the portfolio holistically:
|
||||
|
||||
1. Innovation Trends: What technology areas are they focusing on?
|
||||
2. Strategic Direction: What does this reveal about their business strategy?
|
||||
3. Competitive Position: How defensible are these innovations?
|
||||
4. Market Outlook: What market opportunities do these patents target?
|
||||
5. Performance Forecast: Based on this innovation activity, what's your assessment of their likely performance?
|
||||
|
||||
Patent Portfolio:
|
||||
{combined_content}
|
||||
|
||||
Provide a comprehensive analysis (4-5 paragraphs) with a final verdict on the company's innovation strength and performance outlook."""
|
||||
|
||||
def estimate_portfolio(
|
||||
self,
|
||||
company_name: str,
|
||||
patents_data: List[Dict[str, str]],
|
||||
include_individual_patents: bool = False
|
||||
) -> TokenEstimate:
|
||||
"""Estimate tokens for a company portfolio analysis.
|
||||
|
||||
Args:
|
||||
company_name: Name of the company
|
||||
patents_data: List of dicts with 'patent_id' and 'content' keys
|
||||
include_individual_patents: If True, also count individual patent analysis calls
|
||||
"""
|
||||
# Portfolio analysis tokens
|
||||
portfolio_prompt = self.build_portfolio_prompt(patents_data, company_name)
|
||||
prompt_tokens = self.count_tokens(portfolio_prompt)
|
||||
completion_tokens = self.PORTFOLIO_MAX_OUTPUT
|
||||
|
||||
# Optionally add individual patent analysis
|
||||
if include_individual_patents:
|
||||
for patent in patents_data:
|
||||
single_prompt = self.build_single_patent_prompt(patent['content'], company_name)
|
||||
prompt_tokens += self.count_tokens(single_prompt)
|
||||
completion_tokens += self.SINGLE_PATENT_MAX_OUTPUT
|
||||
|
||||
total_tokens = prompt_tokens + completion_tokens
|
||||
|
||||
# Calculate cost
|
||||
input_cost = (prompt_tokens / 1_000_000) * self.INPUT_COST_PER_1M
|
||||
output_cost = (completion_tokens / 1_000_000) * self.OUTPUT_COST_PER_1M
|
||||
total_cost = input_cost + output_cost
|
||||
|
||||
return TokenEstimate(
|
||||
company_name=company_name,
|
||||
patent_count=len(patents_data),
|
||||
prompt_tokens=prompt_tokens,
|
||||
estimated_completion_tokens=completion_tokens,
|
||||
total_tokens=total_tokens,
|
||||
cost_estimate_usd=total_cost
|
||||
)
|
||||
|
||||
def estimate_from_sample(
|
||||
self,
|
||||
company_name: str,
|
||||
patent_count: int = 10,
|
||||
avg_patent_chars: int = 5000
|
||||
) -> TokenEstimate:
|
||||
"""Estimate tokens using sample/average patent sizes.
|
||||
|
||||
Args:
|
||||
company_name: Name of the company
|
||||
patent_count: Number of patents (default 10, typical from SERP)
|
||||
avg_patent_chars: Average characters per minimized patent content
|
||||
"""
|
||||
# Generate sample patent data
|
||||
sample_content = "A" * avg_patent_chars # Placeholder content
|
||||
patents_data = [
|
||||
{"patent_id": f"US{10000000 + i}", "content": sample_content}
|
||||
for i in range(patent_count)
|
||||
]
|
||||
|
||||
return self.estimate_portfolio(company_name, patents_data)
|
||||
|
||||
|
||||
def main():
|
||||
"""Run token estimation examples."""
|
||||
estimator = TokenEstimator()
|
||||
|
||||
print("=" * 70)
|
||||
print("SPARC Token Usage Estimator")
|
||||
print("=" * 70)
|
||||
|
||||
# Example 1: Estimate with sample data
|
||||
print("\n📊 Sample Estimates (10 patents, ~5000 chars each):\n")
|
||||
|
||||
companies = ["Apple Inc.", "Microsoft Corporation", "Tesla Motors", "Google LLC"]
|
||||
|
||||
total_tokens = 0
|
||||
total_cost = 0.0
|
||||
|
||||
for company in companies:
|
||||
estimate = estimator.estimate_from_sample(company, patent_count=10, avg_patent_chars=5000)
|
||||
print(f" {company}:")
|
||||
print(f" Patents: {estimate.patent_count}")
|
||||
print(f" Prompt tokens: {estimate.prompt_tokens:,}")
|
||||
print(f" Est. completion tokens: {estimate.estimated_completion_tokens:,}")
|
||||
print(f" Total tokens: {estimate.total_tokens:,}")
|
||||
print(f" Est. cost: ${estimate.cost_estimate_usd:.4f}")
|
||||
print()
|
||||
|
||||
total_tokens += estimate.total_tokens
|
||||
total_cost += estimate.cost_estimate_usd
|
||||
|
||||
print("-" * 70)
|
||||
print(f" TOTAL for {len(companies)} companies:")
|
||||
print(f" Total tokens: {total_tokens:,}")
|
||||
print(f" Total est. cost: ${total_cost:.4f}")
|
||||
|
||||
# Example 2: Different portfolio sizes
|
||||
print("\n" + "=" * 70)
|
||||
print("📈 Token Scaling by Portfolio Size:")
|
||||
print("=" * 70 + "\n")
|
||||
|
||||
for patent_count in [5, 10, 15, 20]:
|
||||
estimate = estimator.estimate_from_sample("Sample Corp", patent_count=patent_count)
|
||||
print(f" {patent_count} patents: {estimate.prompt_tokens:,} prompt tokens, ${estimate.cost_estimate_usd:.4f}")
|
||||
|
||||
# Example 3: With actual patent content (simulated)
|
||||
print("\n" + "=" * 70)
|
||||
print("📝 Example with Real Patent Structure:")
|
||||
print("=" * 70 + "\n")
|
||||
|
||||
sample_patents = [
|
||||
{
|
||||
"patent_id": "US11234567",
|
||||
"content": """ABSTRACT: A method for machine learning optimization using gradient descent.
|
||||
|
||||
CLAIMS:
|
||||
1. A computer-implemented method comprising:
|
||||
receiving input data;
|
||||
processing the input data through a neural network;
|
||||
optimizing weights using backpropagation.
|
||||
|
||||
SUMMARY: This invention relates to improvements in neural network training efficiency."""
|
||||
},
|
||||
{
|
||||
"patent_id": "US11234568",
|
||||
"content": """ABSTRACT: System for distributed computing in cloud environments.
|
||||
|
||||
CLAIMS:
|
||||
1. A distributed system comprising:
|
||||
a plurality of compute nodes;
|
||||
a load balancer;
|
||||
a message queue for task distribution.
|
||||
|
||||
SUMMARY: The present disclosure improves cloud computing resource allocation."""
|
||||
}
|
||||
]
|
||||
|
||||
estimate = estimator.estimate_portfolio("Tech Corp", sample_patents)
|
||||
print(f" Company: {estimate.company_name}")
|
||||
print(f" Patents analyzed: {estimate.patent_count}")
|
||||
print(f" Prompt tokens: {estimate.prompt_tokens:,}")
|
||||
print(f" Est. completion: {estimate.estimated_completion_tokens:,}")
|
||||
print(f" Total: {estimate.total_tokens:,}")
|
||||
print(f" Est. cost: ${estimate.cost_estimate_usd:.4f}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user