Compare commits
5 Commits
490850d7a6
...
3424384088
| Author | SHA1 | Date | |
|---|---|---|---|
| 3424384088 | |||
| 5141d9dd47 | |||
| 4e419166e8 | |||
| 7eb72ab549 | |||
| d371ceeec8 |
+15
-2
@@ -1,12 +1,25 @@
|
|||||||
FROM python:3.14-alpine3.23
|
FROM python:3.12-slim
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install system dependencies for pdfplumber and psycopg2
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
gcc \
|
||||||
|
libpq-dev \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
|
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
CMD ["python3", "main.py"]
|
# Create patents directory for PDF storage
|
||||||
|
RUN mkdir -p /app/patents
|
||||||
|
|
||||||
|
# Expose ports for API and Dashboard
|
||||||
|
EXPOSE 8000 8501
|
||||||
|
|
||||||
|
# Default command runs the API (can be overridden in docker-compose)
|
||||||
|
CMD ["uvicorn", "SPARC.api:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
|
|
||||||
|
|||||||
@@ -34,7 +34,25 @@ SPARC/
|
|||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
### NixOS (Recommended)
|
### Docker (Recommended)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Clone and configure
|
||||||
|
git clone <repository-url>
|
||||||
|
cd SPARC
|
||||||
|
cp .env.example .env
|
||||||
|
# Edit .env with your API keys
|
||||||
|
|
||||||
|
# Start all services (API, Dashboard, PostgreSQL)
|
||||||
|
docker-compose up -d
|
||||||
|
|
||||||
|
# Access the services
|
||||||
|
# - API: http://localhost:8000
|
||||||
|
# - Dashboard: http://localhost:8501
|
||||||
|
# - API Docs: http://localhost:8000/docs
|
||||||
|
```
|
||||||
|
|
||||||
|
### NixOS
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
nix develop
|
nix develop
|
||||||
@@ -262,4 +280,4 @@ For open source projects, say how it is licensed.
|
|||||||
|
|
||||||
Core functionality complete. Ready for production use with API keys configured.
|
Core functionality complete. Ready for production use with API keys configured.
|
||||||
|
|
||||||
Next steps: API wrapper, containerization, and multi-company support.
|
All major features implemented: REST API, Streamlit dashboard, Docker containerization, database storage, and multi-company batch processing.
|
||||||
|
|||||||
+20
-2
@@ -8,6 +8,21 @@ from SPARC.types import Patents, Patent
|
|||||||
|
|
||||||
class SERP:
|
class SERP:
|
||||||
def query(company: str) -> Patents:
|
def query(company: str) -> Patents:
|
||||||
|
"""Query Google Patents for a company's recent patents.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
company: Name of the company to search for
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Patents object containing list of patents with PDF links
|
||||||
|
|
||||||
|
Note:
|
||||||
|
Patents without PDF download links are skipped. This occurs when
|
||||||
|
Google Patents doesn't have a PDF available for a particular patent
|
||||||
|
(e.g., recently filed patents, certain international patents, or
|
||||||
|
patents with restricted access). The returned count may be lower
|
||||||
|
than the requested number of results.
|
||||||
|
"""
|
||||||
# Make API call
|
# Make API call
|
||||||
params = {
|
params = {
|
||||||
"engine": "google_patents",
|
"engine": "google_patents",
|
||||||
@@ -18,11 +33,14 @@ class SERP:
|
|||||||
"api_key": config.api_key,
|
"api_key": config.api_key,
|
||||||
}
|
}
|
||||||
search = serpapi.search(params)
|
search = serpapi.search(params)
|
||||||
# Convert data into a list of publicationID
|
# Convert results to Patent objects, skipping any without PDF links
|
||||||
patent_ids = []
|
patent_ids = []
|
||||||
list_of_patents = search["organic_results"]
|
list_of_patents = search["organic_results"]
|
||||||
for patent in list_of_patents:
|
for patent in list_of_patents:
|
||||||
patent_ids.append(Patent(patent_id=patent["publication_number"], pdf_link=patent["pdf"], summary=None))
|
pdf_link = patent.get("pdf")
|
||||||
|
if pdf_link:
|
||||||
|
patent_ids.append(Patent(patent_id=patent["publication_number"], pdf_link=pdf_link, summary=None))
|
||||||
|
# Patents without PDF links are skipped (see docstring for details)
|
||||||
|
|
||||||
return Patents(patents=patent_ids)
|
return Patents(patents=patent_ids)
|
||||||
|
|
||||||
|
|||||||
+517
-101
@@ -17,11 +17,304 @@ from SPARC import config
|
|||||||
|
|
||||||
st.set_page_config(
|
st.set_page_config(
|
||||||
page_title="SPARC Dashboard",
|
page_title="SPARC Dashboard",
|
||||||
page_icon="📊",
|
page_icon="⚡",
|
||||||
layout="wide",
|
layout="wide",
|
||||||
initial_sidebar_state="expanded",
|
initial_sidebar_state="collapsed",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Modern CSS styling
|
||||||
|
st.markdown("""
|
||||||
|
<style>
|
||||||
|
/* Hide default Streamlit elements */
|
||||||
|
#MainMenu {visibility: hidden;}
|
||||||
|
footer {visibility: hidden;}
|
||||||
|
header {visibility: hidden;}
|
||||||
|
|
||||||
|
/* Root variables for theming */
|
||||||
|
:root {
|
||||||
|
--primary: #6366f1;
|
||||||
|
--primary-dark: #4f46e5;
|
||||||
|
--secondary: #0ea5e9;
|
||||||
|
--success: #10b981;
|
||||||
|
--warning: #f59e0b;
|
||||||
|
--error: #ef4444;
|
||||||
|
--bg-dark: #0f172a;
|
||||||
|
--bg-card: #1e293b;
|
||||||
|
--bg-card-hover: #334155;
|
||||||
|
--text-primary: #f8fafc;
|
||||||
|
--text-secondary: #94a3b8;
|
||||||
|
--border: #334155;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Main app background */
|
||||||
|
.stApp {
|
||||||
|
background: linear-gradient(135deg, #0f172a 0%, #1e1b4b 100%);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Top navigation bar */
|
||||||
|
.nav-container {
|
||||||
|
background: rgba(30, 41, 59, 0.8);
|
||||||
|
backdrop-filter: blur(12px);
|
||||||
|
border-bottom: 1px solid rgba(99, 102, 241, 0.2);
|
||||||
|
padding: 1rem 2rem;
|
||||||
|
margin: -1rem -1rem 2rem -1rem;
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: space-between;
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-brand {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.75rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-brand h1 {
|
||||||
|
font-size: 1.5rem;
|
||||||
|
font-weight: 700;
|
||||||
|
background: linear-gradient(135deg, #6366f1, #0ea5e9);
|
||||||
|
-webkit-background-clip: text;
|
||||||
|
-webkit-text-fill-color: transparent;
|
||||||
|
margin: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-brand span {
|
||||||
|
font-size: 0.75rem;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
text-transform: uppercase;
|
||||||
|
letter-spacing: 0.1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Card styling */
|
||||||
|
.modern-card {
|
||||||
|
background: rgba(30, 41, 59, 0.6);
|
||||||
|
backdrop-filter: blur(8px);
|
||||||
|
border: 1px solid rgba(99, 102, 241, 0.15);
|
||||||
|
border-radius: 16px;
|
||||||
|
padding: 1.5rem;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
transition: all 0.3s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
.modern-card:hover {
|
||||||
|
border-color: rgba(99, 102, 241, 0.4);
|
||||||
|
box-shadow: 0 8px 32px rgba(99, 102, 241, 0.15);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Metric cards */
|
||||||
|
.metric-card {
|
||||||
|
background: linear-gradient(135deg, rgba(99, 102, 241, 0.1), rgba(14, 165, 233, 0.1));
|
||||||
|
border: 1px solid rgba(99, 102, 241, 0.2);
|
||||||
|
border-radius: 12px;
|
||||||
|
padding: 1.25rem;
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.metric-value {
|
||||||
|
font-size: 2rem;
|
||||||
|
font-weight: 700;
|
||||||
|
background: linear-gradient(135deg, #6366f1, #0ea5e9);
|
||||||
|
-webkit-background-clip: text;
|
||||||
|
-webkit-text-fill-color: transparent;
|
||||||
|
}
|
||||||
|
|
||||||
|
.metric-label {
|
||||||
|
font-size: 0.875rem;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
text-transform: uppercase;
|
||||||
|
letter-spacing: 0.05em;
|
||||||
|
margin-top: 0.25rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Section headers */
|
||||||
|
.section-header {
|
||||||
|
font-size: 1.25rem;
|
||||||
|
font-weight: 600;
|
||||||
|
color: var(--text-primary);
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
padding-bottom: 0.5rem;
|
||||||
|
border-bottom: 2px solid rgba(99, 102, 241, 0.3);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Input fields */
|
||||||
|
.stTextInput > div > div > input,
|
||||||
|
.stTextArea > div > div > textarea {
|
||||||
|
background: rgba(30, 41, 59, 0.8) !important;
|
||||||
|
border: 1px solid rgba(99, 102, 241, 0.3) !important;
|
||||||
|
border-radius: 10px !important;
|
||||||
|
color: var(--text-primary) !important;
|
||||||
|
padding: 0.75rem 1rem !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
.stTextInput > div > div > input:focus,
|
||||||
|
.stTextArea > div > div > textarea:focus {
|
||||||
|
border-color: var(--primary) !important;
|
||||||
|
box-shadow: 0 0 0 2px rgba(99, 102, 241, 0.2) !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Buttons */
|
||||||
|
.stButton > button {
|
||||||
|
background: linear-gradient(135deg, #6366f1, #4f46e5) !important;
|
||||||
|
color: white !important;
|
||||||
|
border: none !important;
|
||||||
|
border-radius: 10px !important;
|
||||||
|
padding: 0.75rem 1.5rem !important;
|
||||||
|
font-weight: 600 !important;
|
||||||
|
transition: all 0.3s ease !important;
|
||||||
|
box-shadow: 0 4px 14px rgba(99, 102, 241, 0.3) !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
.stButton > button:hover {
|
||||||
|
transform: translateY(-2px) !important;
|
||||||
|
box-shadow: 0 6px 20px rgba(99, 102, 241, 0.4) !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Tabs styling */
|
||||||
|
.stTabs [data-baseweb="tab-list"] {
|
||||||
|
background: rgba(30, 41, 59, 0.6);
|
||||||
|
border-radius: 12px;
|
||||||
|
padding: 0.5rem;
|
||||||
|
gap: 0.5rem;
|
||||||
|
border: 1px solid rgba(99, 102, 241, 0.15);
|
||||||
|
}
|
||||||
|
|
||||||
|
.stTabs [data-baseweb="tab"] {
|
||||||
|
background: transparent;
|
||||||
|
border-radius: 8px;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
padding: 0.75rem 1.5rem;
|
||||||
|
font-weight: 500;
|
||||||
|
}
|
||||||
|
|
||||||
|
.stTabs [aria-selected="true"] {
|
||||||
|
background: linear-gradient(135deg, #6366f1, #4f46e5) !important;
|
||||||
|
color: white !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
.stTabs [data-baseweb="tab-border"] {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.stTabs [data-baseweb="tab-highlight"] {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Expander styling */
|
||||||
|
.streamlit-expanderHeader {
|
||||||
|
background: rgba(30, 41, 59, 0.6) !important;
|
||||||
|
border: 1px solid rgba(99, 102, 241, 0.15) !important;
|
||||||
|
border-radius: 10px !important;
|
||||||
|
color: var(--text-primary) !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
.streamlit-expanderContent {
|
||||||
|
background: rgba(30, 41, 59, 0.4) !important;
|
||||||
|
border: 1px solid rgba(99, 102, 241, 0.1) !important;
|
||||||
|
border-top: none !important;
|
||||||
|
border-radius: 0 0 10px 10px !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Slider */
|
||||||
|
.stSlider > div > div > div {
|
||||||
|
background: var(--primary) !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Select box */
|
||||||
|
.stSelectbox > div > div {
|
||||||
|
background: rgba(30, 41, 59, 0.8) !important;
|
||||||
|
border: 1px solid rgba(99, 102, 241, 0.3) !important;
|
||||||
|
border-radius: 10px !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Progress bar */
|
||||||
|
.stProgress > div > div > div {
|
||||||
|
background: linear-gradient(90deg, #6366f1, #0ea5e9) !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Alerts */
|
||||||
|
.stAlert {
|
||||||
|
border-radius: 10px !important;
|
||||||
|
border: none !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Metrics */
|
||||||
|
[data-testid="stMetricValue"] {
|
||||||
|
background: linear-gradient(135deg, #6366f1, #0ea5e9);
|
||||||
|
-webkit-background-clip: text;
|
||||||
|
-webkit-text-fill-color: transparent;
|
||||||
|
font-weight: 700;
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-testid="stMetricLabel"] {
|
||||||
|
color: var(--text-secondary) !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Plotly charts */
|
||||||
|
.js-plotly-plot {
|
||||||
|
border-radius: 12px;
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Status badges */
|
||||||
|
.status-badge {
|
||||||
|
display: inline-block;
|
||||||
|
padding: 0.25rem 0.75rem;
|
||||||
|
border-radius: 9999px;
|
||||||
|
font-size: 0.75rem;
|
||||||
|
font-weight: 600;
|
||||||
|
text-transform: uppercase;
|
||||||
|
}
|
||||||
|
|
||||||
|
.status-success {
|
||||||
|
background: rgba(16, 185, 129, 0.2);
|
||||||
|
color: #10b981;
|
||||||
|
border: 1px solid rgba(16, 185, 129, 0.3);
|
||||||
|
}
|
||||||
|
|
||||||
|
.status-warning {
|
||||||
|
background: rgba(245, 158, 11, 0.2);
|
||||||
|
color: #f59e0b;
|
||||||
|
border: 1px solid rgba(245, 158, 11, 0.3);
|
||||||
|
}
|
||||||
|
|
||||||
|
.status-error {
|
||||||
|
background: rgba(239, 68, 68, 0.2);
|
||||||
|
color: #ef4444;
|
||||||
|
border: 1px solid rgba(239, 68, 68, 0.3);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Dividers */
|
||||||
|
hr {
|
||||||
|
border: none;
|
||||||
|
border-top: 1px solid rgba(99, 102, 241, 0.2);
|
||||||
|
margin: 1.5rem 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Info boxes */
|
||||||
|
.info-box {
|
||||||
|
background: linear-gradient(135deg, rgba(99, 102, 241, 0.1), rgba(14, 165, 233, 0.05));
|
||||||
|
border: 1px solid rgba(99, 102, 241, 0.2);
|
||||||
|
border-radius: 12px;
|
||||||
|
padding: 1rem 1.25rem;
|
||||||
|
margin: 1rem 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Feature list */
|
||||||
|
.feature-item {
|
||||||
|
display: flex;
|
||||||
|
align-items: flex-start;
|
||||||
|
gap: 0.75rem;
|
||||||
|
padding: 0.75rem 0;
|
||||||
|
border-bottom: 1px solid rgba(99, 102, 241, 0.1);
|
||||||
|
}
|
||||||
|
|
||||||
|
.feature-icon {
|
||||||
|
color: var(--primary);
|
||||||
|
font-size: 1.25rem;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
""", unsafe_allow_html=True)
|
||||||
|
|
||||||
|
|
||||||
@st.cache_resource
|
@st.cache_resource
|
||||||
def get_analyzer():
|
def get_analyzer():
|
||||||
@@ -43,37 +336,44 @@ def get_db_client():
|
|||||||
|
|
||||||
|
|
||||||
def render_header():
|
def render_header():
|
||||||
"""Render the dashboard header."""
|
"""Render the modern dashboard header."""
|
||||||
st.title("SPARC Dashboard")
|
st.markdown("""
|
||||||
st.markdown("**Semiconductor Patent & Analytics Report Core**")
|
<div class="nav-container">
|
||||||
st.markdown("---")
|
<div class="nav-brand">
|
||||||
|
<h1>⚡ SPARC</h1>
|
||||||
|
<span>Semiconductor Patent Analytics</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
""", unsafe_allow_html=True)
|
||||||
|
|
||||||
|
|
||||||
def render_sidebar():
|
def render_navigation():
|
||||||
"""Render the sidebar with navigation and controls."""
|
"""Render horizontal tab navigation at the top."""
|
||||||
st.sidebar.title("Navigation")
|
tabs = st.tabs(["🔍 Company Analysis", "📦 Batch Analysis", "📊 Analytics", "ℹ️ About"])
|
||||||
page = st.sidebar.radio(
|
return tabs
|
||||||
"Select Page",
|
|
||||||
["Company Analysis", "Batch Analysis", "Analytics", "About"],
|
|
||||||
)
|
|
||||||
return page
|
|
||||||
|
|
||||||
|
|
||||||
def render_company_analysis():
|
def render_company_analysis():
|
||||||
"""Render single company analysis page."""
|
"""Render single company analysis page."""
|
||||||
st.header("Company Patent Analysis")
|
st.markdown('<p class="section-header">Single Company Analysis</p>', unsafe_allow_html=True)
|
||||||
|
st.markdown("Analyze a company's patent portfolio using AI-powered insights.")
|
||||||
|
|
||||||
col1, col2 = st.columns([2, 1])
|
st.markdown("")
|
||||||
|
|
||||||
with col1:
|
# Search card
|
||||||
company_name = st.text_input(
|
with st.container():
|
||||||
"Company Name",
|
col1, col2 = st.columns([3, 1])
|
||||||
placeholder="e.g., nvidia, intel, amd",
|
|
||||||
help="Enter the company name to analyze their patent portfolio",
|
|
||||||
)
|
|
||||||
|
|
||||||
with col2:
|
with col1:
|
||||||
analyze_btn = st.button("Analyze", type="primary", use_container_width=True)
|
company_name = st.text_input(
|
||||||
|
"Company Name",
|
||||||
|
placeholder="Enter company name (e.g., nvidia, intel, amd)",
|
||||||
|
help="Enter the company name to analyze their patent portfolio",
|
||||||
|
label_visibility="collapsed",
|
||||||
|
)
|
||||||
|
|
||||||
|
with col2:
|
||||||
|
analyze_btn = st.button("🔍 Analyze", type="primary", use_container_width=True)
|
||||||
|
|
||||||
if analyze_btn and company_name:
|
if analyze_btn and company_name:
|
||||||
with st.spinner(f"Analyzing {company_name}..."):
|
with st.spinner(f"Analyzing {company_name}..."):
|
||||||
@@ -81,45 +381,57 @@ def render_company_analysis():
|
|||||||
result = analyzer._analyze_company_safe(company_name)
|
result = analyzer._analyze_company_safe(company_name)
|
||||||
|
|
||||||
if result.success:
|
if result.success:
|
||||||
st.success(f"Analysis complete for {company_name}")
|
st.success(f"✓ Analysis complete for {company_name.upper()}")
|
||||||
|
|
||||||
# Metrics row
|
st.markdown("")
|
||||||
|
|
||||||
|
# Metrics row with custom styling
|
||||||
col1, col2, col3 = st.columns(3)
|
col1, col2, col3 = st.columns(3)
|
||||||
with col1:
|
with col1:
|
||||||
st.metric("Patents Analyzed", result.patent_count)
|
st.metric("Patents Found", result.patent_count)
|
||||||
with col2:
|
with col2:
|
||||||
st.metric("Status", "Success")
|
st.metric("Analysis Status", "Complete")
|
||||||
with col3:
|
with col3:
|
||||||
st.metric("Timestamp", result.timestamp.strftime("%H:%M:%S"))
|
st.metric("Timestamp", result.timestamp.strftime("%H:%M:%S"))
|
||||||
|
|
||||||
# Analysis content
|
st.markdown("")
|
||||||
st.subheader("AI Analysis")
|
|
||||||
st.markdown(result.analysis)
|
# Analysis content in a styled container
|
||||||
|
st.markdown('<p class="section-header">AI Analysis Results</p>', unsafe_allow_html=True)
|
||||||
|
with st.container():
|
||||||
|
st.markdown(result.analysis)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
st.error(f"Analysis failed: {result.error}")
|
st.error(f"Analysis failed: {result.error}")
|
||||||
|
|
||||||
|
elif not company_name and analyze_btn:
|
||||||
|
st.warning("Please enter a company name to analyze.")
|
||||||
|
|
||||||
|
|
||||||
def render_batch_analysis():
|
def render_batch_analysis():
|
||||||
"""Render batch analysis page."""
|
"""Render batch analysis page."""
|
||||||
st.header("Batch Company Analysis")
|
st.markdown('<p class="section-header">Batch Company Analysis</p>', unsafe_allow_html=True)
|
||||||
|
st.markdown("Analyze multiple companies simultaneously for comparative insights.")
|
||||||
|
|
||||||
st.markdown(
|
st.markdown("")
|
||||||
"Analyze multiple companies simultaneously. Enter company names separated by commas or newlines."
|
|
||||||
)
|
|
||||||
|
|
||||||
companies_input = st.text_area(
|
# Input section
|
||||||
"Company Names",
|
col1, col2 = st.columns([2, 1])
|
||||||
placeholder="nvidia\namd\nintel\nqualcomm",
|
|
||||||
height=150,
|
|
||||||
)
|
|
||||||
|
|
||||||
col1, col2 = st.columns(2)
|
|
||||||
with col1:
|
with col1:
|
||||||
max_workers = st.slider("Concurrent Workers", 1, 5, 3)
|
companies_input = st.text_area(
|
||||||
|
"Company Names",
|
||||||
|
placeholder="Enter company names (one per line or comma-separated):\nnvidia\namd\nintel\nqualcomm",
|
||||||
|
height=150,
|
||||||
|
label_visibility="collapsed",
|
||||||
|
)
|
||||||
|
|
||||||
with col2:
|
with col2:
|
||||||
|
st.markdown("**Configuration**")
|
||||||
|
max_workers = st.slider("Concurrent Workers", 1, 5, 3, help="Number of parallel analysis threads")
|
||||||
|
st.markdown("")
|
||||||
analyze_btn = st.button(
|
analyze_btn = st.button(
|
||||||
"Run Batch Analysis", type="primary", use_container_width=True
|
"🚀 Run Batch Analysis", type="primary", use_container_width=True
|
||||||
)
|
)
|
||||||
|
|
||||||
if analyze_btn and companies_input:
|
if analyze_btn and companies_input:
|
||||||
@@ -134,7 +446,7 @@ def render_batch_analysis():
|
|||||||
st.warning("Please enter at least one company name")
|
st.warning("Please enter at least one company name")
|
||||||
return
|
return
|
||||||
|
|
||||||
st.info(f"Starting analysis of {len(companies)} companies...")
|
st.info(f"🔄 Starting analysis of {len(companies)} companies...")
|
||||||
|
|
||||||
# Progress tracking
|
# Progress tracking
|
||||||
progress_bar = st.progress(0)
|
progress_bar = st.progress(0)
|
||||||
@@ -154,10 +466,12 @@ def render_batch_analysis():
|
|||||||
)
|
)
|
||||||
|
|
||||||
progress_bar.progress(1.0)
|
progress_bar.progress(1.0)
|
||||||
status_text.text("Analysis complete!")
|
status_text.text("✓ Analysis complete!")
|
||||||
|
|
||||||
|
st.markdown("")
|
||||||
|
|
||||||
# Summary metrics
|
# Summary metrics
|
||||||
st.subheader("Results Summary")
|
st.markdown('<p class="section-header">Results Summary</p>', unsafe_allow_html=True)
|
||||||
col1, col2, col3, col4 = st.columns(4)
|
col1, col2, col3, col4 = st.columns(4)
|
||||||
with col1:
|
with col1:
|
||||||
st.metric("Total Companies", result.total_companies)
|
st.metric("Total Companies", result.total_companies)
|
||||||
@@ -178,7 +492,7 @@ def render_batch_analysis():
|
|||||||
df = pd.DataFrame(
|
df = pd.DataFrame(
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"Company": r.company_name,
|
"Company": r.company_name.upper(),
|
||||||
"Patents": r.patent_count,
|
"Patents": r.patent_count,
|
||||||
"Status": "Success" if r.success else "Failed",
|
"Status": "Success" if r.success else "Failed",
|
||||||
}
|
}
|
||||||
@@ -191,16 +505,34 @@ def render_batch_analysis():
|
|||||||
x="Company",
|
x="Company",
|
||||||
y="Patents",
|
y="Patents",
|
||||||
color="Status",
|
color="Status",
|
||||||
color_discrete_map={"Success": "#28a745", "Failed": "#dc3545"},
|
color_discrete_map={"Success": "#10b981", "Failed": "#ef4444"},
|
||||||
title="Patents per Company",
|
title="",
|
||||||
|
)
|
||||||
|
fig.update_layout(
|
||||||
|
plot_bgcolor="rgba(0,0,0,0)",
|
||||||
|
paper_bgcolor="rgba(0,0,0,0)",
|
||||||
|
font_color="#94a3b8",
|
||||||
|
legend=dict(
|
||||||
|
orientation="h",
|
||||||
|
yanchor="bottom",
|
||||||
|
y=1.02,
|
||||||
|
xanchor="right",
|
||||||
|
x=1
|
||||||
|
),
|
||||||
|
xaxis=dict(showgrid=False),
|
||||||
|
yaxis=dict(showgrid=True, gridcolor="rgba(99, 102, 241, 0.1)"),
|
||||||
)
|
)
|
||||||
st.plotly_chart(fig, use_container_width=True)
|
st.plotly_chart(fig, use_container_width=True)
|
||||||
|
|
||||||
|
st.markdown("")
|
||||||
|
|
||||||
# Individual results
|
# Individual results
|
||||||
st.subheader("Individual Results")
|
st.markdown('<p class="section-header">Detailed Results</p>', unsafe_allow_html=True)
|
||||||
for r in result.results:
|
for r in result.results:
|
||||||
|
status_icon = "✓" if r.success else "✗"
|
||||||
|
status_class = "status-success" if r.success else "status-error"
|
||||||
with st.expander(
|
with st.expander(
|
||||||
f"{'✓' if r.success else '✗'} {r.company_name} ({r.patent_count} patents)"
|
f"{status_icon} {r.company_name.upper()} — {r.patent_count} patents"
|
||||||
):
|
):
|
||||||
if r.success:
|
if r.success:
|
||||||
st.markdown(r.analysis)
|
st.markdown(r.analysis)
|
||||||
@@ -210,21 +542,28 @@ def render_batch_analysis():
|
|||||||
|
|
||||||
def render_analytics():
|
def render_analytics():
|
||||||
"""Render analytics page with database insights."""
|
"""Render analytics page with database insights."""
|
||||||
st.header("Analytics Dashboard")
|
st.markdown('<p class="section-header">Analytics Dashboard</p>', unsafe_allow_html=True)
|
||||||
|
st.markdown("Track historical analysis data and view insights.")
|
||||||
|
|
||||||
db_client = get_db_client()
|
db_client = get_db_client()
|
||||||
|
|
||||||
if not db_client:
|
if not db_client:
|
||||||
st.warning(
|
st.markdown("")
|
||||||
"Database mode is not enabled. Set USE_DATABASE=true in your .env file to enable analytics."
|
st.markdown("""
|
||||||
)
|
<div class="info-box">
|
||||||
st.info(
|
<strong>⚠️ Database Not Connected</strong><br>
|
||||||
"Analytics features require storing analysis results in PostgreSQL for historical tracking."
|
<span style="color: #94a3b8;">Set <code>USE_DATABASE=true</code> in your .env file to enable analytics tracking.</span>
|
||||||
)
|
</div>
|
||||||
|
""", unsafe_allow_html=True)
|
||||||
|
st.info("Analytics features require storing analysis results in PostgreSQL for historical tracking.")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
st.markdown("")
|
||||||
|
|
||||||
# Time range selector
|
# Time range selector
|
||||||
days = st.selectbox("Time Range", [7, 14, 30, 90], index=0)
|
col1, col2, col3 = st.columns([1, 2, 1])
|
||||||
|
with col1:
|
||||||
|
days = st.selectbox("Time Range", [7, 14, 30, 90], index=0, format_func=lambda x: f"Last {x} days")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
analytics = db_client.get_analytics(days=days)
|
analytics = db_client.get_analytics(days=days)
|
||||||
@@ -233,8 +572,9 @@ def render_analytics():
|
|||||||
st.info("No analytics data available yet. Run some analyses first!")
|
st.info("No analytics data available yet. Run some analyses first!")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
st.markdown("")
|
||||||
|
|
||||||
# Summary metrics
|
# Summary metrics
|
||||||
st.subheader("Summary")
|
|
||||||
col1, col2, col3 = st.columns(3)
|
col1, col2, col3 = st.columns(3)
|
||||||
|
|
||||||
with col1:
|
with col1:
|
||||||
@@ -249,6 +589,8 @@ def render_analytics():
|
|||||||
types = len(analytics.get("by_type", {}))
|
types = len(analytics.get("by_type", {}))
|
||||||
st.metric("Analysis Types", types)
|
st.metric("Analysis Types", types)
|
||||||
|
|
||||||
|
st.markdown("")
|
||||||
|
|
||||||
# Charts
|
# Charts
|
||||||
col1, col2 = st.columns(2)
|
col1, col2 = st.columns(2)
|
||||||
|
|
||||||
@@ -256,10 +598,17 @@ def render_analytics():
|
|||||||
by_company = analytics.get("by_company", {})
|
by_company = analytics.get("by_company", {})
|
||||||
if by_company:
|
if by_company:
|
||||||
df = pd.DataFrame(
|
df = pd.DataFrame(
|
||||||
[{"Company": k, "Count": v} for k, v in by_company.items()]
|
[{"Company": k.upper(), "Count": v} for k, v in by_company.items()]
|
||||||
)
|
)
|
||||||
fig = px.pie(
|
fig = px.pie(
|
||||||
df, values="Count", names="Company", title="Analyses by Company"
|
df, values="Count", names="Company", title="Distribution by Company",
|
||||||
|
hole=0.4,
|
||||||
|
color_discrete_sequence=px.colors.sequential.Purp_r,
|
||||||
|
)
|
||||||
|
fig.update_layout(
|
||||||
|
plot_bgcolor="rgba(0,0,0,0)",
|
||||||
|
paper_bgcolor="rgba(0,0,0,0)",
|
||||||
|
font_color="#94a3b8",
|
||||||
)
|
)
|
||||||
st.plotly_chart(fig, use_container_width=True)
|
st.plotly_chart(fig, use_container_width=True)
|
||||||
|
|
||||||
@@ -269,19 +618,29 @@ def render_analytics():
|
|||||||
df = pd.DataFrame(
|
df = pd.DataFrame(
|
||||||
[{"Type": k, "Count": v} for k, v in by_type.items()]
|
[{"Type": k, "Count": v} for k, v in by_type.items()]
|
||||||
)
|
)
|
||||||
fig = px.bar(df, x="Type", y="Count", title="Analyses by Type")
|
fig = px.bar(df, x="Type", y="Count", title="Analysis Types",
|
||||||
|
color_discrete_sequence=["#6366f1"])
|
||||||
|
fig.update_layout(
|
||||||
|
plot_bgcolor="rgba(0,0,0,0)",
|
||||||
|
paper_bgcolor="rgba(0,0,0,0)",
|
||||||
|
font_color="#94a3b8",
|
||||||
|
xaxis=dict(showgrid=False),
|
||||||
|
yaxis=dict(showgrid=True, gridcolor="rgba(99, 102, 241, 0.1)"),
|
||||||
|
)
|
||||||
st.plotly_chart(fig, use_container_width=True)
|
st.plotly_chart(fig, use_container_width=True)
|
||||||
|
|
||||||
|
st.markdown("")
|
||||||
|
|
||||||
# Recent messages
|
# Recent messages
|
||||||
st.subheader("Recent Analyses")
|
st.markdown('<p class="section-header">Recent Analyses</p>', unsafe_allow_html=True)
|
||||||
messages = db_client.get_messages(limit=10)
|
messages = db_client.get_messages(limit=10)
|
||||||
|
|
||||||
if messages:
|
if messages:
|
||||||
for msg in messages:
|
for msg in messages:
|
||||||
with st.expander(
|
with st.expander(
|
||||||
f"{msg.get('company_name', 'Unknown')} - {msg.get('analysis_type', 'N/A')} ({msg.get('timestamp', 'N/A')})"
|
f"📄 {msg.get('company_name', 'Unknown').upper()} — {msg.get('analysis_type', 'N/A')} ({msg.get('timestamp', 'N/A')})"
|
||||||
):
|
):
|
||||||
st.markdown(f"**Model:** {msg.get('model', 'N/A')}")
|
st.markdown(f"**Model:** `{msg.get('model', 'N/A')}`")
|
||||||
if msg.get("response"):
|
if msg.get("response"):
|
||||||
st.markdown(msg["response"][:500] + "...")
|
st.markdown(msg["response"][:500] + "...")
|
||||||
|
|
||||||
@@ -291,70 +650,127 @@ def render_analytics():
|
|||||||
|
|
||||||
def render_about():
|
def render_about():
|
||||||
"""Render about page."""
|
"""Render about page."""
|
||||||
st.header("About SPARC")
|
st.markdown('<p class="section-header">About SPARC</p>', unsafe_allow_html=True)
|
||||||
|
|
||||||
st.markdown(
|
col1, col2 = st.columns([2, 1])
|
||||||
"""
|
|
||||||
**SPARC** (Semiconductor Patent & Analytics Report Core) is a patent analysis
|
|
||||||
system that estimates company performance by analyzing their patent portfolios
|
|
||||||
using LLM-powered insights.
|
|
||||||
|
|
||||||
### Features
|
with col1:
|
||||||
|
st.markdown("""
|
||||||
|
**SPARC** (Semiconductor Patent & Analytics Report Core) is an AI-powered patent analysis
|
||||||
|
platform that evaluates company performance by analyzing their patent portfolios
|
||||||
|
with cutting-edge language models.
|
||||||
|
""")
|
||||||
|
|
||||||
- **Patent Retrieval**: Automated collection via SerpAPI's Google Patents engine
|
st.markdown("")
|
||||||
- **Intelligent Parsing**: Extracts key sections from patent PDFs
|
st.markdown("**Key Features**")
|
||||||
- **AI Analysis**: Uses Claude 3.5 Sonnet for deep analysis
|
|
||||||
- **Batch Processing**: Analyze multiple companies concurrently
|
|
||||||
- **REST API**: FastAPI web service for integration
|
|
||||||
- **Analytics**: Track and visualize analysis history
|
|
||||||
|
|
||||||
### Technology Stack
|
features = [
|
||||||
|
("🔍", "Patent Retrieval", "Automated collection via SerpAPI's Google Patents"),
|
||||||
|
("📄", "Intelligent Parsing", "Extracts key sections from patent documents"),
|
||||||
|
("🤖", "AI Analysis", "Deep analysis powered by Claude 3.5 Sonnet"),
|
||||||
|
("⚡", "Batch Processing", "Analyze multiple companies concurrently"),
|
||||||
|
("🌐", "REST API", "FastAPI web service for seamless integration"),
|
||||||
|
("📊", "Analytics", "Track and visualize historical analysis data"),
|
||||||
|
]
|
||||||
|
|
||||||
- **Backend**: Python, FastAPI
|
for icon, title, desc in features:
|
||||||
- **AI**: Claude 3.5 Sonnet via OpenRouter
|
st.markdown(f"""
|
||||||
- **Database**: PostgreSQL
|
<div class="feature-item">
|
||||||
- **Dashboard**: Streamlit, Plotly
|
<span class="feature-icon">{icon}</span>
|
||||||
- **Patent Data**: SerpAPI Google Patents
|
<div>
|
||||||
|
<strong>{title}</strong><br>
|
||||||
|
<span style="color: #94a3b8; font-size: 0.875rem;">{desc}</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
""", unsafe_allow_html=True)
|
||||||
|
|
||||||
### Links
|
with col2:
|
||||||
|
st.markdown("**Technology Stack**")
|
||||||
|
st.markdown("""
|
||||||
|
<div class="info-box">
|
||||||
|
<div style="display: grid; gap: 0.5rem;">
|
||||||
|
<div><span style="color: #6366f1;">Backend</span><br><span style="color: #94a3b8;">Python, FastAPI</span></div>
|
||||||
|
<div><span style="color: #6366f1;">AI Model</span><br><span style="color: #94a3b8;">Claude 3.5 Sonnet</span></div>
|
||||||
|
<div><span style="color: #6366f1;">Database</span><br><span style="color: #94a3b8;">PostgreSQL</span></div>
|
||||||
|
<div><span style="color: #6366f1;">Dashboard</span><br><span style="color: #94a3b8;">Streamlit, Plotly</span></div>
|
||||||
|
<div><span style="color: #6366f1;">Data Source</span><br><span style="color: #94a3b8;">SerpAPI Patents</span></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
""", unsafe_allow_html=True)
|
||||||
|
|
||||||
- API Docs: `http://localhost:8000/docs`
|
st.markdown("")
|
||||||
- Health Check: `http://localhost:8000/health`
|
st.markdown("**API Endpoints**")
|
||||||
"""
|
st.code("http://localhost:8000/docs", language=None)
|
||||||
)
|
st.code("http://localhost:8000/health", language=None)
|
||||||
|
|
||||||
|
st.markdown("")
|
||||||
|
st.markdown("")
|
||||||
|
|
||||||
# System status
|
# System status
|
||||||
st.subheader("System Status")
|
st.markdown('<p class="section-header">System Status</p>', unsafe_allow_html=True)
|
||||||
|
|
||||||
col1, col2 = st.columns(2)
|
col1, col2, col3 = st.columns(3)
|
||||||
|
|
||||||
with col1:
|
with col1:
|
||||||
db_client = get_db_client()
|
db_client = get_db_client()
|
||||||
if db_client:
|
if db_client:
|
||||||
st.success("Database: Connected")
|
st.markdown("""
|
||||||
|
<div class="metric-card">
|
||||||
|
<div style="color: #10b981; font-size: 1.5rem;">●</div>
|
||||||
|
<div class="metric-label">Database</div>
|
||||||
|
<div style="color: #10b981; font-weight: 600;">Connected</div>
|
||||||
|
</div>
|
||||||
|
""", unsafe_allow_html=True)
|
||||||
else:
|
else:
|
||||||
st.warning("Database: Not configured")
|
st.markdown("""
|
||||||
|
<div class="metric-card">
|
||||||
|
<div style="color: #f59e0b; font-size: 1.5rem;">●</div>
|
||||||
|
<div class="metric-label">Database</div>
|
||||||
|
<div style="color: #f59e0b; font-weight: 600;">Not Configured</div>
|
||||||
|
</div>
|
||||||
|
""", unsafe_allow_html=True)
|
||||||
|
|
||||||
with col2:
|
with col2:
|
||||||
analyzer = get_analyzer()
|
analyzer = get_analyzer()
|
||||||
if analyzer:
|
if analyzer:
|
||||||
st.success("Analyzer: Ready")
|
st.markdown("""
|
||||||
|
<div class="metric-card">
|
||||||
|
<div style="color: #10b981; font-size: 1.5rem;">●</div>
|
||||||
|
<div class="metric-label">Analyzer</div>
|
||||||
|
<div style="color: #10b981; font-weight: 600;">Ready</div>
|
||||||
|
</div>
|
||||||
|
""", unsafe_allow_html=True)
|
||||||
else:
|
else:
|
||||||
st.error("Analyzer: Not initialized")
|
st.markdown("""
|
||||||
|
<div class="metric-card">
|
||||||
|
<div style="color: #ef4444; font-size: 1.5rem;">●</div>
|
||||||
|
<div class="metric-label">Analyzer</div>
|
||||||
|
<div style="color: #ef4444; font-weight: 600;">Not Initialized</div>
|
||||||
|
</div>
|
||||||
|
""", unsafe_allow_html=True)
|
||||||
|
|
||||||
|
with col3:
|
||||||
|
st.markdown("""
|
||||||
|
<div class="metric-card">
|
||||||
|
<div style="color: #10b981; font-size: 1.5rem;">●</div>
|
||||||
|
<div class="metric-label">Dashboard</div>
|
||||||
|
<div style="color: #10b981; font-weight: 600;">Online</div>
|
||||||
|
</div>
|
||||||
|
""", unsafe_allow_html=True)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main dashboard entry point."""
|
"""Main dashboard entry point."""
|
||||||
render_header()
|
render_header()
|
||||||
page = render_sidebar()
|
tabs = render_navigation()
|
||||||
|
|
||||||
if page == "Company Analysis":
|
with tabs[0]:
|
||||||
render_company_analysis()
|
render_company_analysis()
|
||||||
elif page == "Batch Analysis":
|
with tabs[1]:
|
||||||
render_batch_analysis()
|
render_batch_analysis()
|
||||||
elif page == "Analytics":
|
with tabs[2]:
|
||||||
render_analytics()
|
render_analytics()
|
||||||
elif page == "About":
|
with tabs[3]:
|
||||||
render_about()
|
render_about()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
+42
-8
@@ -12,25 +12,59 @@ services:
|
|||||||
- postgres_data:/var/lib/postgresql/data
|
- postgres_data:/var/lib/postgresql/data
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD-SHELL", "pg_isready -U postgres"]
|
test: ["CMD-SHELL", "pg_isready -U postgres"]
|
||||||
interval: 10s
|
interval: 5s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 5
|
retries: 5
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
app:
|
init-db:
|
||||||
build:
|
build: .
|
||||||
context: .
|
container_name: sparc-init-db
|
||||||
dockerfile: Dockerfile
|
command: python scripts/init_database.py
|
||||||
container_name: sparc-app
|
environment:
|
||||||
|
DATABASE_URL: postgresql://postgres:postgres@postgres:5432/sparc
|
||||||
|
USE_DATABASE: "true"
|
||||||
depends_on:
|
depends_on:
|
||||||
postgres:
|
postgres:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
restart: "no"
|
||||||
|
|
||||||
|
api:
|
||||||
|
build: .
|
||||||
|
container_name: sparc-api
|
||||||
|
command: uvicorn SPARC.api:app --host 0.0.0.0 --port 8000
|
||||||
environment:
|
environment:
|
||||||
USE_DATABASE: true
|
API_KEY: ${API_KEY}
|
||||||
|
OPENROUTER_API_KEY: ${OPENROUTER_API_KEY}
|
||||||
DATABASE_URL: postgresql://postgres:postgres@postgres:5432/sparc
|
DATABASE_URL: postgresql://postgres:postgres@postgres:5432/sparc
|
||||||
|
USE_DATABASE: "true"
|
||||||
ports:
|
ports:
|
||||||
- "8000:8000"
|
- "8000:8000"
|
||||||
|
depends_on:
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
|
init-db:
|
||||||
|
condition: service_completed_successfully
|
||||||
volumes:
|
volumes:
|
||||||
- .:/app
|
- ./patents:/app/patents
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
dashboard:
|
||||||
|
build: .
|
||||||
|
container_name: sparc-dashboard
|
||||||
|
command: streamlit run dashboard.py --server.port 8501 --server.address 0.0.0.0
|
||||||
|
environment:
|
||||||
|
API_KEY: ${API_KEY}
|
||||||
|
OPENROUTER_API_KEY: ${OPENROUTER_API_KEY}
|
||||||
|
DATABASE_URL: postgresql://postgres:postgres@postgres:5432/sparc
|
||||||
|
USE_DATABASE: "true"
|
||||||
|
ports:
|
||||||
|
- "8501:8501"
|
||||||
|
depends_on:
|
||||||
|
- api
|
||||||
|
volumes:
|
||||||
|
- ./patents:/app/patents
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
postgres_data:
|
postgres_data:
|
||||||
|
|||||||
+62
-106
@@ -55,28 +55,25 @@ USE_DATABASE=true
|
|||||||
## Step 2: Start Services with Docker Compose
|
## Step 2: Start Services with Docker Compose
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Start PostgreSQL database
|
# Start all services (PostgreSQL, API, and Dashboard)
|
||||||
docker-compose up -d postgres
|
docker-compose up -d
|
||||||
|
|
||||||
# Wait for postgres to be healthy (check with)
|
# Check status
|
||||||
docker-compose ps
|
docker-compose ps
|
||||||
|
|
||||||
# You should see sparc-postgres with status "healthy"
|
# You should see:
|
||||||
|
# - sparc-postgres (healthy)
|
||||||
|
# - sparc-api (running on port 8000)
|
||||||
|
# - sparc-dashboard (running on port 8501)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
The database is automatically initialized by the `init-db` service.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Step 3: Initialize the Database
|
## Step 3: Database Schema
|
||||||
|
|
||||||
```bash
|
The `init-db` service automatically creates the `llm_messages` table with the following schema:
|
||||||
# Option A: If running locally with Python
|
|
||||||
python scripts/init_database.py
|
|
||||||
|
|
||||||
# Option B: If using Docker, run inside container
|
|
||||||
docker-compose run --rm sparc-app python scripts/init_database.py
|
|
||||||
```
|
|
||||||
|
|
||||||
This creates the `llm_messages` table with the following schema:
|
|
||||||
|
|
||||||
| Column | Type | Purpose |
|
| Column | Type | Purpose |
|
||||||
|--------|------|---------|
|
|--------|------|---------|
|
||||||
@@ -95,9 +92,30 @@ This creates the `llm_messages` table with the following schema:
|
|||||||
|
|
||||||
## Step 4: Run the Services
|
## Step 4: Run the Services
|
||||||
|
|
||||||
### Option A: Run Locally (Development)
|
### Option A: Run with Docker Compose (Recommended)
|
||||||
|
|
||||||
|
All services are started automatically with `docker-compose up -d` from Step 2.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
# View logs
|
||||||
|
docker-compose logs -f
|
||||||
|
|
||||||
|
# View specific service logs
|
||||||
|
docker-compose logs -f api
|
||||||
|
docker-compose logs -f dashboard
|
||||||
|
```
|
||||||
|
|
||||||
|
### Option B: Run Locally (Development)
|
||||||
|
|
||||||
|
If you prefer running services locally without Docker:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start PostgreSQL with Docker
|
||||||
|
docker-compose up -d postgres
|
||||||
|
|
||||||
|
# Wait for database to be healthy, then initialize
|
||||||
|
python scripts/init_database.py
|
||||||
|
|
||||||
# Terminal 1: Start FastAPI backend
|
# Terminal 1: Start FastAPI backend
|
||||||
uvicorn SPARC.api:app --host 0.0.0.0 --port 8000 --reload
|
uvicorn SPARC.api:app --host 0.0.0.0 --port 8000 --reload
|
||||||
|
|
||||||
@@ -105,14 +123,6 @@ uvicorn SPARC.api:app --host 0.0.0.0 --port 8000 --reload
|
|||||||
streamlit run dashboard.py --server.port 8501 --server.address 0.0.0.0
|
streamlit run dashboard.py --server.port 8501 --server.address 0.0.0.0
|
||||||
```
|
```
|
||||||
|
|
||||||
### Option B: Run with Docker (Production)
|
|
||||||
|
|
||||||
See [Production Docker Compose](#production-docker-compose) section below for a complete `docker-compose.prod.yml` configuration.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker-compose -f docker-compose.prod.yml up -d
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Step 5: Verify Deployment
|
## Step 5: Verify Deployment
|
||||||
@@ -256,97 +266,41 @@ postgresql://postgres:postgres@localhost:5432/sparc
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Production Docker Compose
|
## Docker Compose Services
|
||||||
|
|
||||||
Create a `docker-compose.prod.yml` file for full production deployment:
|
The `docker-compose.yml` includes all services needed for production:
|
||||||
|
|
||||||
```yaml
|
| Service | Container | Port | Description |
|
||||||
version: '3.8'
|
|---------|-----------|------|-------------|
|
||||||
|
| `postgres` | sparc-postgres | 5432 | PostgreSQL database |
|
||||||
|
| `init-db` | sparc-init-db | - | One-time database initialization |
|
||||||
|
| `api` | sparc-api | 8000 | FastAPI REST API |
|
||||||
|
| `dashboard` | sparc-dashboard | 8501 | Streamlit web UI |
|
||||||
|
|
||||||
services:
|
### Common Docker Compose Commands
|
||||||
postgres:
|
|
||||||
image: postgres:16-alpine
|
|
||||||
container_name: sparc-postgres
|
|
||||||
environment:
|
|
||||||
POSTGRES_USER: postgres
|
|
||||||
POSTGRES_PASSWORD: postgres
|
|
||||||
POSTGRES_DB: sparc
|
|
||||||
volumes:
|
|
||||||
- postgres_data:/var/lib/postgresql/data
|
|
||||||
ports:
|
|
||||||
- "5432:5432"
|
|
||||||
healthcheck:
|
|
||||||
test: ["CMD-SHELL", "pg_isready -U postgres"]
|
|
||||||
interval: 5s
|
|
||||||
timeout: 5s
|
|
||||||
retries: 5
|
|
||||||
restart: unless-stopped
|
|
||||||
|
|
||||||
api:
|
|
||||||
build: .
|
|
||||||
container_name: sparc-api
|
|
||||||
command: uvicorn SPARC.api:app --host 0.0.0.0 --port 8000
|
|
||||||
environment:
|
|
||||||
- API_KEY=${API_KEY}
|
|
||||||
- OPENROUTER_API_KEY=${OPENROUTER_API_KEY}
|
|
||||||
- DATABASE_URL=postgresql://postgres:postgres@postgres:5432/sparc
|
|
||||||
- USE_DATABASE=true
|
|
||||||
ports:
|
|
||||||
- "8000:8000"
|
|
||||||
depends_on:
|
|
||||||
postgres:
|
|
||||||
condition: service_healthy
|
|
||||||
volumes:
|
|
||||||
- ./patents:/app/patents
|
|
||||||
restart: unless-stopped
|
|
||||||
|
|
||||||
dashboard:
|
|
||||||
build: .
|
|
||||||
container_name: sparc-dashboard
|
|
||||||
command: streamlit run dashboard.py --server.port 8501 --server.address 0.0.0.0
|
|
||||||
environment:
|
|
||||||
- API_KEY=${API_KEY}
|
|
||||||
- OPENROUTER_API_KEY=${OPENROUTER_API_KEY}
|
|
||||||
- DATABASE_URL=postgresql://postgres:postgres@postgres:5432/sparc
|
|
||||||
- USE_DATABASE=true
|
|
||||||
ports:
|
|
||||||
- "8501:8501"
|
|
||||||
depends_on:
|
|
||||||
- api
|
|
||||||
volumes:
|
|
||||||
- ./patents:/app/patents
|
|
||||||
restart: unless-stopped
|
|
||||||
|
|
||||||
init-db:
|
|
||||||
build: .
|
|
||||||
container_name: sparc-init-db
|
|
||||||
command: python scripts/init_database.py
|
|
||||||
environment:
|
|
||||||
- DATABASE_URL=postgresql://postgres:postgres@postgres:5432/sparc
|
|
||||||
- USE_DATABASE=true
|
|
||||||
depends_on:
|
|
||||||
postgres:
|
|
||||||
condition: service_healthy
|
|
||||||
restart: "no"
|
|
||||||
|
|
||||||
volumes:
|
|
||||||
postgres_data:
|
|
||||||
```
|
|
||||||
|
|
||||||
### Deploy with Production Compose
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Start all services
|
# Start all services
|
||||||
docker-compose -f docker-compose.prod.yml up -d
|
docker-compose up -d
|
||||||
|
|
||||||
|
# Start with rebuild (after code changes)
|
||||||
|
docker-compose up -d --build
|
||||||
|
|
||||||
# View logs
|
# View logs
|
||||||
docker-compose -f docker-compose.prod.yml logs -f
|
docker-compose logs -f
|
||||||
|
|
||||||
|
# View specific service logs
|
||||||
|
docker-compose logs -f api
|
||||||
|
docker-compose logs -f dashboard
|
||||||
|
|
||||||
# Stop all services
|
# Stop all services
|
||||||
docker-compose -f docker-compose.prod.yml down
|
docker-compose down
|
||||||
|
|
||||||
# Stop and remove volumes (WARNING: deletes data)
|
# Stop and remove volumes (WARNING: deletes data)
|
||||||
docker-compose -f docker-compose.prod.yml down -v
|
docker-compose down -v
|
||||||
|
|
||||||
|
# Restart a specific service
|
||||||
|
docker-compose restart api
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
@@ -417,7 +371,12 @@ docker-compose logs -f dashboard
|
|||||||
## Quick Reference
|
## Quick Reference
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Development setup
|
# Docker setup (recommended)
|
||||||
|
cp .env.example .env
|
||||||
|
# Edit .env with API keys
|
||||||
|
docker-compose up -d
|
||||||
|
|
||||||
|
# Local development setup
|
||||||
cp .env.example .env
|
cp .env.example .env
|
||||||
# Edit .env with API keys
|
# Edit .env with API keys
|
||||||
docker-compose up -d postgres
|
docker-compose up -d postgres
|
||||||
@@ -425,9 +384,6 @@ python scripts/init_database.py
|
|||||||
uvicorn SPARC.api:app --reload &
|
uvicorn SPARC.api:app --reload &
|
||||||
streamlit run dashboard.py
|
streamlit run dashboard.py
|
||||||
|
|
||||||
# Production setup
|
|
||||||
docker-compose -f docker-compose.prod.yml up -d
|
|
||||||
|
|
||||||
# Check status
|
# Check status
|
||||||
curl http://localhost:8000/health
|
curl http://localhost:8000/health
|
||||||
open http://localhost:8501
|
open http://localhost:8501
|
||||||
|
|||||||
@@ -20,6 +20,14 @@
|
|||||||
packages = [
|
packages = [
|
||||||
python
|
python
|
||||||
pkgs.python311Packages.virtualenv # gives `virtualenv` tool
|
pkgs.python311Packages.virtualenv # gives `virtualenv` tool
|
||||||
|
pkgs.zlib
|
||||||
|
pkgs.stdenv.cc.cc.lib
|
||||||
|
];
|
||||||
|
|
||||||
|
# Required for numpy and other C extension packages
|
||||||
|
LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath [
|
||||||
|
pkgs.zlib
|
||||||
|
pkgs.stdenv.cc.cc.lib
|
||||||
];
|
];
|
||||||
|
|
||||||
shellHook = ''
|
shellHook = ''
|
||||||
|
|||||||
@@ -11,4 +11,5 @@ uvicorn[standard]
|
|||||||
httpx
|
httpx
|
||||||
streamlit
|
streamlit
|
||||||
plotly
|
plotly
|
||||||
|
numpy
|
||||||
pandas
|
pandas
|
||||||
|
|||||||
@@ -0,0 +1,227 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Estimate token usage per company portfolio for SPARC analysis."""
|
||||||
|
|
||||||
|
import tiktoken
|
||||||
|
from typing import Dict, List, Optional
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TokenEstimate:
|
||||||
|
"""Token usage estimate for a company portfolio."""
|
||||||
|
company_name: str
|
||||||
|
patent_count: int
|
||||||
|
prompt_tokens: int
|
||||||
|
estimated_completion_tokens: int
|
||||||
|
total_tokens: int
|
||||||
|
cost_estimate_usd: float
|
||||||
|
|
||||||
|
|
||||||
|
class TokenEstimator:
|
||||||
|
"""Estimate token usage for SPARC patent analysis."""
|
||||||
|
|
||||||
|
# Claude 3.5 Sonnet pricing via OpenRouter (per 1M tokens)
|
||||||
|
INPUT_COST_PER_1M = 3.00 # $3.00 per 1M input tokens
|
||||||
|
OUTPUT_COST_PER_1M = 15.00 # $15.00 per 1M output tokens
|
||||||
|
|
||||||
|
# Estimated output tokens based on max_tokens settings
|
||||||
|
SINGLE_PATENT_MAX_OUTPUT = 1024
|
||||||
|
PORTFOLIO_MAX_OUTPUT = 2048
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
# Use cl100k_base encoding (closest to Claude's tokenizer)
|
||||||
|
self.encoder = tiktoken.get_encoding("cl100k_base")
|
||||||
|
|
||||||
|
def count_tokens(self, text: str) -> int:
|
||||||
|
"""Count tokens in a text string."""
|
||||||
|
return len(self.encoder.encode(text))
|
||||||
|
|
||||||
|
def build_single_patent_prompt(self, patent_content: str, company_name: str) -> str:
|
||||||
|
"""Build prompt for single patent analysis (matches llm.py)."""
|
||||||
|
return f"""You are a patent analyst evaluating {company_name}'s innovation strategy.
|
||||||
|
|
||||||
|
Analyze the following patent content and provide insights on:
|
||||||
|
1. Innovation quality and novelty
|
||||||
|
2. Technical complexity and defensibility
|
||||||
|
3. Market potential and commercial viability
|
||||||
|
4. Strategic positioning relative to industry trends
|
||||||
|
|
||||||
|
Patent Content:
|
||||||
|
{patent_content}
|
||||||
|
|
||||||
|
Provide a concise analysis (2-3 paragraphs) focusing on what this patent reveals about the company's technical direction and competitive advantage."""
|
||||||
|
|
||||||
|
def build_portfolio_prompt(self, patents_data: List[Dict[str, str]], company_name: str) -> str:
|
||||||
|
"""Build prompt for portfolio analysis (matches llm.py)."""
|
||||||
|
portfolio_summary = []
|
||||||
|
for idx, patent in enumerate(patents_data, 1):
|
||||||
|
portfolio_summary.append(
|
||||||
|
f"Patent {idx} ({patent['patent_id']}):\n{patent['content']}"
|
||||||
|
)
|
||||||
|
combined_content = "\n\n---\n\n".join(portfolio_summary)
|
||||||
|
|
||||||
|
return f"""You are analyzing {company_name}'s patent portfolio to estimate their future performance and innovation trajectory.
|
||||||
|
|
||||||
|
You have {len(patents_data)} recent patents to analyze. Evaluate the portfolio holistically:
|
||||||
|
|
||||||
|
1. Innovation Trends: What technology areas are they focusing on?
|
||||||
|
2. Strategic Direction: What does this reveal about their business strategy?
|
||||||
|
3. Competitive Position: How defensible are these innovations?
|
||||||
|
4. Market Outlook: What market opportunities do these patents target?
|
||||||
|
5. Performance Forecast: Based on this innovation activity, what's your assessment of their likely performance?
|
||||||
|
|
||||||
|
Patent Portfolio:
|
||||||
|
{combined_content}
|
||||||
|
|
||||||
|
Provide a comprehensive analysis (4-5 paragraphs) with a final verdict on the company's innovation strength and performance outlook."""
|
||||||
|
|
||||||
|
def estimate_portfolio(
|
||||||
|
self,
|
||||||
|
company_name: str,
|
||||||
|
patents_data: List[Dict[str, str]],
|
||||||
|
include_individual_patents: bool = False
|
||||||
|
) -> TokenEstimate:
|
||||||
|
"""Estimate tokens for a company portfolio analysis.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
company_name: Name of the company
|
||||||
|
patents_data: List of dicts with 'patent_id' and 'content' keys
|
||||||
|
include_individual_patents: If True, also count individual patent analysis calls
|
||||||
|
"""
|
||||||
|
# Portfolio analysis tokens
|
||||||
|
portfolio_prompt = self.build_portfolio_prompt(patents_data, company_name)
|
||||||
|
prompt_tokens = self.count_tokens(portfolio_prompt)
|
||||||
|
completion_tokens = self.PORTFOLIO_MAX_OUTPUT
|
||||||
|
|
||||||
|
# Optionally add individual patent analysis
|
||||||
|
if include_individual_patents:
|
||||||
|
for patent in patents_data:
|
||||||
|
single_prompt = self.build_single_patent_prompt(patent['content'], company_name)
|
||||||
|
prompt_tokens += self.count_tokens(single_prompt)
|
||||||
|
completion_tokens += self.SINGLE_PATENT_MAX_OUTPUT
|
||||||
|
|
||||||
|
total_tokens = prompt_tokens + completion_tokens
|
||||||
|
|
||||||
|
# Calculate cost
|
||||||
|
input_cost = (prompt_tokens / 1_000_000) * self.INPUT_COST_PER_1M
|
||||||
|
output_cost = (completion_tokens / 1_000_000) * self.OUTPUT_COST_PER_1M
|
||||||
|
total_cost = input_cost + output_cost
|
||||||
|
|
||||||
|
return TokenEstimate(
|
||||||
|
company_name=company_name,
|
||||||
|
patent_count=len(patents_data),
|
||||||
|
prompt_tokens=prompt_tokens,
|
||||||
|
estimated_completion_tokens=completion_tokens,
|
||||||
|
total_tokens=total_tokens,
|
||||||
|
cost_estimate_usd=total_cost
|
||||||
|
)
|
||||||
|
|
||||||
|
def estimate_from_sample(
|
||||||
|
self,
|
||||||
|
company_name: str,
|
||||||
|
patent_count: int = 10,
|
||||||
|
avg_patent_chars: int = 5000
|
||||||
|
) -> TokenEstimate:
|
||||||
|
"""Estimate tokens using sample/average patent sizes.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
company_name: Name of the company
|
||||||
|
patent_count: Number of patents (default 10, typical from SERP)
|
||||||
|
avg_patent_chars: Average characters per minimized patent content
|
||||||
|
"""
|
||||||
|
# Generate sample patent data
|
||||||
|
sample_content = "A" * avg_patent_chars # Placeholder content
|
||||||
|
patents_data = [
|
||||||
|
{"patent_id": f"US{10000000 + i}", "content": sample_content}
|
||||||
|
for i in range(patent_count)
|
||||||
|
]
|
||||||
|
|
||||||
|
return self.estimate_portfolio(company_name, patents_data)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Run token estimation examples."""
|
||||||
|
estimator = TokenEstimator()
|
||||||
|
|
||||||
|
print("=" * 70)
|
||||||
|
print("SPARC Token Usage Estimator")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
# Example 1: Estimate with sample data
|
||||||
|
print("\n📊 Sample Estimates (10 patents, ~5000 chars each):\n")
|
||||||
|
|
||||||
|
companies = ["Apple Inc.", "Microsoft Corporation", "Tesla Motors", "Google LLC"]
|
||||||
|
|
||||||
|
total_tokens = 0
|
||||||
|
total_cost = 0.0
|
||||||
|
|
||||||
|
for company in companies:
|
||||||
|
estimate = estimator.estimate_from_sample(company, patent_count=10, avg_patent_chars=5000)
|
||||||
|
print(f" {company}:")
|
||||||
|
print(f" Patents: {estimate.patent_count}")
|
||||||
|
print(f" Prompt tokens: {estimate.prompt_tokens:,}")
|
||||||
|
print(f" Est. completion tokens: {estimate.estimated_completion_tokens:,}")
|
||||||
|
print(f" Total tokens: {estimate.total_tokens:,}")
|
||||||
|
print(f" Est. cost: ${estimate.cost_estimate_usd:.4f}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
total_tokens += estimate.total_tokens
|
||||||
|
total_cost += estimate.cost_estimate_usd
|
||||||
|
|
||||||
|
print("-" * 70)
|
||||||
|
print(f" TOTAL for {len(companies)} companies:")
|
||||||
|
print(f" Total tokens: {total_tokens:,}")
|
||||||
|
print(f" Total est. cost: ${total_cost:.4f}")
|
||||||
|
|
||||||
|
# Example 2: Different portfolio sizes
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("📈 Token Scaling by Portfolio Size:")
|
||||||
|
print("=" * 70 + "\n")
|
||||||
|
|
||||||
|
for patent_count in [5, 10, 15, 20]:
|
||||||
|
estimate = estimator.estimate_from_sample("Sample Corp", patent_count=patent_count)
|
||||||
|
print(f" {patent_count} patents: {estimate.prompt_tokens:,} prompt tokens, ${estimate.cost_estimate_usd:.4f}")
|
||||||
|
|
||||||
|
# Example 3: With actual patent content (simulated)
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("📝 Example with Real Patent Structure:")
|
||||||
|
print("=" * 70 + "\n")
|
||||||
|
|
||||||
|
sample_patents = [
|
||||||
|
{
|
||||||
|
"patent_id": "US11234567",
|
||||||
|
"content": """ABSTRACT: A method for machine learning optimization using gradient descent.
|
||||||
|
|
||||||
|
CLAIMS:
|
||||||
|
1. A computer-implemented method comprising:
|
||||||
|
receiving input data;
|
||||||
|
processing the input data through a neural network;
|
||||||
|
optimizing weights using backpropagation.
|
||||||
|
|
||||||
|
SUMMARY: This invention relates to improvements in neural network training efficiency."""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"patent_id": "US11234568",
|
||||||
|
"content": """ABSTRACT: System for distributed computing in cloud environments.
|
||||||
|
|
||||||
|
CLAIMS:
|
||||||
|
1. A distributed system comprising:
|
||||||
|
a plurality of compute nodes;
|
||||||
|
a load balancer;
|
||||||
|
a message queue for task distribution.
|
||||||
|
|
||||||
|
SUMMARY: The present disclosure improves cloud computing resource allocation."""
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
estimate = estimator.estimate_portfolio("Tech Corp", sample_patents)
|
||||||
|
print(f" Company: {estimate.company_name}")
|
||||||
|
print(f" Patents analyzed: {estimate.patent_count}")
|
||||||
|
print(f" Prompt tokens: {estimate.prompt_tokens:,}")
|
||||||
|
print(f" Est. completion: {estimate.estimated_completion_tokens:,}")
|
||||||
|
print(f" Total: {estimate.total_tokens:,}")
|
||||||
|
print(f" Est. cost: ${estimate.cost_estimate_usd:.4f}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user