feat: add Streamlit visualization dashboard
Build and Push Docker Image / build-and-push (push) Has been cancelled

- Create interactive dashboard with company analysis page
- Add batch analysis with progress tracking and charts
- Include analytics page for historical data visualization
- Add system status monitoring
- Update requirements.txt with streamlit, plotly, pandas
- Document dashboard usage in README

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2026-03-12 23:28:30 -04:00
parent 3479ba8a46
commit d4ba13846a
3 changed files with 383 additions and 1 deletions
+18 -1
View File
@@ -17,6 +17,7 @@ SPARC automatically collects, parses, and analyzes patents from companies to pro
- **Portfolio Analysis**: Evaluates multiple patents holistically for comprehensive insights - **Portfolio Analysis**: Evaluates multiple patents holistically for comprehensive insights
- **Batch Processing**: Analyze multiple companies concurrently with progress tracking - **Batch Processing**: Analyze multiple companies concurrently with progress tracking
- **REST API**: FastAPI web service with async job support - **REST API**: FastAPI web service with async job support
- **Dashboard**: Interactive Streamlit visualization dashboard
- **Robust Testing**: 40 tests covering all major functionality - **Robust Testing**: 40 tests covering all major functionality
## Architecture ## Architecture
@@ -167,6 +168,22 @@ curl -X POST http://localhost:8000/analyze/batch/async \
-d '{"companies": ["nvidia", "amd", "intel", "qualcomm"]}' -d '{"companies": ["nvidia", "amd", "intel", "qualcomm"]}'
``` ```
### Visualization Dashboard
Launch the interactive Streamlit dashboard:
```bash
streamlit run dashboard.py
```
Dashboard features:
- **Company Analysis**: Analyze individual companies with real-time results
- **Batch Analysis**: Process multiple companies with progress tracking and charts
- **Analytics**: View historical analysis data and trends (requires database mode)
- **System Status**: Monitor database and analyzer health
The dashboard runs at `http://localhost:8501` by default.
## Running Tests ## Running Tests
```bash ```bash
@@ -202,7 +219,7 @@ pytest tests/ --cov=SPARC --cov-report=term-missing
- [X] FastAPI web service wrapper - [X] FastAPI web service wrapper
- [X] Docker containerization - [X] Docker containerization
- [X] Results persistence (database) - [X] Results persistence (database)
- [ ] Visualization dashboard - [X] Visualization dashboard
## Development ## Development
+362
View File
@@ -0,0 +1,362 @@
"""SPARC Visualization Dashboard.
A Streamlit-based dashboard for visualizing patent analysis results.
Run with: streamlit run dashboard.py
"""
import streamlit as st
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
from datetime import datetime, timedelta
from SPARC.analyzer import CompanyAnalyzer
from SPARC.database import DatabaseClient
from SPARC import config
st.set_page_config(
page_title="SPARC Dashboard",
page_icon="📊",
layout="wide",
initial_sidebar_state="expanded",
)
@st.cache_resource
def get_analyzer():
"""Get or create the CompanyAnalyzer instance."""
return CompanyAnalyzer()
@st.cache_resource
def get_db_client():
"""Get database client if available."""
if config.use_database:
try:
client = DatabaseClient()
client.connect()
return client
except Exception:
return None
return None
def render_header():
"""Render the dashboard header."""
st.title("SPARC Dashboard")
st.markdown("**Semiconductor Patent & Analytics Report Core**")
st.markdown("---")
def render_sidebar():
"""Render the sidebar with navigation and controls."""
st.sidebar.title("Navigation")
page = st.sidebar.radio(
"Select Page",
["Company Analysis", "Batch Analysis", "Analytics", "About"],
)
return page
def render_company_analysis():
"""Render single company analysis page."""
st.header("Company Patent Analysis")
col1, col2 = st.columns([2, 1])
with col1:
company_name = st.text_input(
"Company Name",
placeholder="e.g., nvidia, intel, amd",
help="Enter the company name to analyze their patent portfolio",
)
with col2:
analyze_btn = st.button("Analyze", type="primary", use_container_width=True)
if analyze_btn and company_name:
with st.spinner(f"Analyzing {company_name}..."):
analyzer = get_analyzer()
result = analyzer._analyze_company_safe(company_name)
if result.success:
st.success(f"Analysis complete for {company_name}")
# Metrics row
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Patents Analyzed", result.patent_count)
with col2:
st.metric("Status", "Success")
with col3:
st.metric("Timestamp", result.timestamp.strftime("%H:%M:%S"))
# Analysis content
st.subheader("AI Analysis")
st.markdown(result.analysis)
else:
st.error(f"Analysis failed: {result.error}")
def render_batch_analysis():
"""Render batch analysis page."""
st.header("Batch Company Analysis")
st.markdown(
"Analyze multiple companies simultaneously. Enter company names separated by commas or newlines."
)
companies_input = st.text_area(
"Company Names",
placeholder="nvidia\namd\nintel\nqualcomm",
height=150,
)
col1, col2 = st.columns(2)
with col1:
max_workers = st.slider("Concurrent Workers", 1, 5, 3)
with col2:
analyze_btn = st.button(
"Run Batch Analysis", type="primary", use_container_width=True
)
if analyze_btn and companies_input:
# Parse company names
companies = [
c.strip()
for c in companies_input.replace(",", "\n").split("\n")
if c.strip()
]
if not companies:
st.warning("Please enter at least one company name")
return
st.info(f"Starting analysis of {len(companies)} companies...")
# Progress tracking
progress_bar = st.progress(0)
status_text = st.empty()
analyzer = get_analyzer()
def update_progress(company: str, completed: int, total: int):
progress = completed / total
progress_bar.progress(progress)
status_text.text(f"Analyzing {company}... ({completed}/{total})")
result = analyzer.analyze_companies(
companies=companies,
max_workers=max_workers,
progress_callback=update_progress,
)
progress_bar.progress(1.0)
status_text.text("Analysis complete!")
# Summary metrics
st.subheader("Results Summary")
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("Total Companies", result.total_companies)
with col2:
st.metric("Successful", result.successful)
with col3:
st.metric("Failed", result.failed)
with col4:
success_rate = (
(result.successful / result.total_companies * 100)
if result.total_companies > 0
else 0
)
st.metric("Success Rate", f"{success_rate:.1f}%")
# Results chart
if result.results:
df = pd.DataFrame(
[
{
"Company": r.company_name,
"Patents": r.patent_count,
"Status": "Success" if r.success else "Failed",
}
for r in result.results
]
)
fig = px.bar(
df,
x="Company",
y="Patents",
color="Status",
color_discrete_map={"Success": "#28a745", "Failed": "#dc3545"},
title="Patents per Company",
)
st.plotly_chart(fig, use_container_width=True)
# Individual results
st.subheader("Individual Results")
for r in result.results:
with st.expander(
f"{'' if r.success else ''} {r.company_name} ({r.patent_count} patents)"
):
if r.success:
st.markdown(r.analysis)
else:
st.error(r.error)
def render_analytics():
"""Render analytics page with database insights."""
st.header("Analytics Dashboard")
db_client = get_db_client()
if not db_client:
st.warning(
"Database mode is not enabled. Set USE_DATABASE=true in your .env file to enable analytics."
)
st.info(
"Analytics features require storing analysis results in PostgreSQL for historical tracking."
)
return
# Time range selector
days = st.selectbox("Time Range", [7, 14, 30, 90], index=0)
try:
analytics = db_client.get_analytics(days=days)
if not analytics:
st.info("No analytics data available yet. Run some analyses first!")
return
# Summary metrics
st.subheader("Summary")
col1, col2, col3 = st.columns(3)
with col1:
total = analytics.get("total_messages", 0)
st.metric("Total Analyses", total)
with col2:
companies = len(analytics.get("by_company", {}))
st.metric("Companies Analyzed", companies)
with col3:
types = len(analytics.get("by_type", {}))
st.metric("Analysis Types", types)
# Charts
col1, col2 = st.columns(2)
with col1:
by_company = analytics.get("by_company", {})
if by_company:
df = pd.DataFrame(
[{"Company": k, "Count": v} for k, v in by_company.items()]
)
fig = px.pie(
df, values="Count", names="Company", title="Analyses by Company"
)
st.plotly_chart(fig, use_container_width=True)
with col2:
by_type = analytics.get("by_type", {})
if by_type:
df = pd.DataFrame(
[{"Type": k, "Count": v} for k, v in by_type.items()]
)
fig = px.bar(df, x="Type", y="Count", title="Analyses by Type")
st.plotly_chart(fig, use_container_width=True)
# Recent messages
st.subheader("Recent Analyses")
messages = db_client.get_messages(limit=10)
if messages:
for msg in messages:
with st.expander(
f"{msg.get('company_name', 'Unknown')} - {msg.get('analysis_type', 'N/A')} ({msg.get('timestamp', 'N/A')})"
):
st.markdown(f"**Model:** {msg.get('model', 'N/A')}")
if msg.get("response"):
st.markdown(msg["response"][:500] + "...")
except Exception as e:
st.error(f"Error fetching analytics: {e}")
def render_about():
"""Render about page."""
st.header("About SPARC")
st.markdown(
"""
**SPARC** (Semiconductor Patent & Analytics Report Core) is a patent analysis
system that estimates company performance by analyzing their patent portfolios
using LLM-powered insights.
### Features
- **Patent Retrieval**: Automated collection via SerpAPI's Google Patents engine
- **Intelligent Parsing**: Extracts key sections from patent PDFs
- **AI Analysis**: Uses Claude 3.5 Sonnet for deep analysis
- **Batch Processing**: Analyze multiple companies concurrently
- **REST API**: FastAPI web service for integration
- **Analytics**: Track and visualize analysis history
### Technology Stack
- **Backend**: Python, FastAPI
- **AI**: Claude 3.5 Sonnet via OpenRouter
- **Database**: PostgreSQL
- **Dashboard**: Streamlit, Plotly
- **Patent Data**: SerpAPI Google Patents
### Links
- API Docs: `http://localhost:8000/docs`
- Health Check: `http://localhost:8000/health`
"""
)
# System status
st.subheader("System Status")
col1, col2 = st.columns(2)
with col1:
db_client = get_db_client()
if db_client:
st.success("Database: Connected")
else:
st.warning("Database: Not configured")
with col2:
analyzer = get_analyzer()
if analyzer:
st.success("Analyzer: Ready")
else:
st.error("Analyzer: Not initialized")
def main():
"""Main dashboard entry point."""
render_header()
page = render_sidebar()
if page == "Company Analysis":
render_company_analysis()
elif page == "Batch Analysis":
render_batch_analysis()
elif page == "Analytics":
render_analytics()
elif page == "About":
render_about()
if __name__ == "__main__":
main()
+3
View File
@@ -9,3 +9,6 @@ psycopg2-binary
fastapi fastapi
uvicorn[standard] uvicorn[standard]
httpx httpx
streamlit
plotly
pandas