feat: add Streamlit visualization dashboard

- Create interactive dashboard with company analysis page - Add batch analysis with progress tracking and charts - Include analytics page for historical data visualization - Add system status monitoring - Update requirements.txt with streamlit, plotly, pandas - Document dashboard usage in README 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2026-03-12 23:28:30 -04:00
parent 3479ba8a46
commit d4ba13846a
3 changed files with 383 additions and 1 deletions
@@ -17,6 +17,7 @@ SPARC automatically collects, parses, and analyzes patents from companies to pro
 - **Portfolio Analysis**: Evaluates multiple patents holistically for comprehensive insights
 - **Batch Processing**: Analyze multiple companies concurrently with progress tracking
 - **REST API**: FastAPI web service with async job support
 - **Dashboard**: Interactive Streamlit visualization dashboard
 - **Robust Testing**: 40 tests covering all major functionality
 ## Architecture
@@ -167,6 +168,22 @@ curl -X POST http://localhost:8000/analyze/batch/async \
  -d '{"companies": ["nvidia", "amd", "intel", "qualcomm"]}'
 ```
 ### Visualization Dashboard
 Launch the interactive Streamlit dashboard:
 ```bash
 streamlit run dashboard.py
 ```
 Dashboard features:
 - **Company Analysis**: Analyze individual companies with real-time results
 - **Batch Analysis**: Process multiple companies with progress tracking and charts
 - **Analytics**: View historical analysis data and trends (requires database mode)
 - **System Status**: Monitor database and analyzer health
 The dashboard runs at `http://localhost:8501` by default.
 ## Running Tests
 ```bash
@@ -202,7 +219,7 @@ pytest tests/ --cov=SPARC --cov-report=term-missing
 - [X] FastAPI web service wrapper
 - [X] Docker containerization
 - [X] Results persistence (database)
- [ ] Visualization dashboard
+- [X] Visualization dashboard
 ## Development
@@ -0,0 +1,362 @@
 """SPARC Visualization Dashboard.
 A Streamlit-based dashboard for visualizing patent analysis results.
 Run with: streamlit run dashboard.py
 """
 import streamlit as st
 import plotly.express as px
 import plotly.graph_objects as go
 import pandas as pd
 from datetime import datetime, timedelta
 from SPARC.analyzer import CompanyAnalyzer
 from SPARC.database import DatabaseClient
 from SPARC import config
 st.set_page_config(
    page_title="SPARC Dashboard",
    page_icon="📊",
    layout="wide",
    initial_sidebar_state="expanded",
 )
@st.cache_resource
 def get_analyzer():
    """Get or create the CompanyAnalyzer instance."""
    return CompanyAnalyzer()
@st.cache_resource
 def get_db_client():
    """Get database client if available."""
    if config.use_database:
        try:
            client = DatabaseClient()
            client.connect()
            return client
        except Exception:
            return None
    return None
 def render_header():
    """Render the dashboard header."""
    st.title("SPARC Dashboard")
    st.markdown("**Semiconductor Patent & Analytics Report Core**")
    st.markdown("---")
 def render_sidebar():
    """Render the sidebar with navigation and controls."""
    st.sidebar.title("Navigation")
    page = st.sidebar.radio(
        "Select Page",
        ["Company Analysis", "Batch Analysis", "Analytics", "About"],
    )
    return page
 def render_company_analysis():
    """Render single company analysis page."""
    st.header("Company Patent Analysis")
    col1, col2 = st.columns([2, 1])
    with col1:
        company_name = st.text_input(
            "Company Name",
            placeholder="e.g., nvidia, intel, amd",
            help="Enter the company name to analyze their patent portfolio",
        )
    with col2:
        analyze_btn = st.button("Analyze", type="primary", use_container_width=True)
    if analyze_btn and company_name:
        with st.spinner(f"Analyzing {company_name}..."):
            analyzer = get_analyzer()
            result = analyzer._analyze_company_safe(company_name)
        if result.success:
            st.success(f"Analysis complete for {company_name}")
            # Metrics row
            col1, col2, col3 = st.columns(3)
            with col1:
                st.metric("Patents Analyzed", result.patent_count)
            with col2:
                st.metric("Status", "Success")
            with col3:
                st.metric("Timestamp", result.timestamp.strftime("%H:%M:%S"))
            # Analysis content
            st.subheader("AI Analysis")
            st.markdown(result.analysis)
        else:
            st.error(f"Analysis failed: {result.error}")
 def render_batch_analysis():
    """Render batch analysis page."""
    st.header("Batch Company Analysis")
    st.markdown(
        "Analyze multiple companies simultaneously. Enter company names separated by commas or newlines."
    )
    companies_input = st.text_area(
        "Company Names",
        placeholder="nvidia\namd\nintel\nqualcomm",
        height=150,
    )
    col1, col2 = st.columns(2)
    with col1:
        max_workers = st.slider("Concurrent Workers", 1, 5, 3)
    with col2:
        analyze_btn = st.button(
            "Run Batch Analysis", type="primary", use_container_width=True
        )
    if analyze_btn and companies_input:
        # Parse company names
        companies = [
            c.strip()
            for c in companies_input.replace(",", "\n").split("\n")
            if c.strip()
        ]
        if not companies:
            st.warning("Please enter at least one company name")
            return
        st.info(f"Starting analysis of {len(companies)} companies...")
        # Progress tracking
        progress_bar = st.progress(0)
        status_text = st.empty()
        analyzer = get_analyzer()
        def update_progress(company: str, completed: int, total: int):
            progress = completed / total
            progress_bar.progress(progress)
            status_text.text(f"Analyzing {company}... ({completed}/{total})")
        result = analyzer.analyze_companies(
            companies=companies,
            max_workers=max_workers,
            progress_callback=update_progress,
        )
        progress_bar.progress(1.0)
        status_text.text("Analysis complete!")
        # Summary metrics
        st.subheader("Results Summary")
        col1, col2, col3, col4 = st.columns(4)
        with col1:
            st.metric("Total Companies", result.total_companies)
        with col2:
            st.metric("Successful", result.successful)
        with col3:
            st.metric("Failed", result.failed)
        with col4:
            success_rate = (
                (result.successful / result.total_companies * 100)
                if result.total_companies > 0
                else 0
            )
            st.metric("Success Rate", f"{success_rate:.1f}%")
        # Results chart
        if result.results:
            df = pd.DataFrame(
                [
                    {
                        "Company": r.company_name,
                        "Patents": r.patent_count,
                        "Status": "Success" if r.success else "Failed",
                    }
                    for r in result.results
                ]
            )
            fig = px.bar(
                df,
                x="Company",
                y="Patents",
                color="Status",
                color_discrete_map={"Success": "#28a745", "Failed": "#dc3545"},
                title="Patents per Company",
            )
            st.plotly_chart(fig, use_container_width=True)
        # Individual results
        st.subheader("Individual Results")
        for r in result.results:
            with st.expander(
                f"{'✓' if r.success else '✗'} {r.company_name} ({r.patent_count} patents)"
            ):
                if r.success:
                    st.markdown(r.analysis)
                else:
                    st.error(r.error)
 def render_analytics():
    """Render analytics page with database insights."""
    st.header("Analytics Dashboard")
    db_client = get_db_client()
    if not db_client:
        st.warning(
            "Database mode is not enabled. Set USE_DATABASE=true in your .env file to enable analytics."
        )
        st.info(
            "Analytics features require storing analysis results in PostgreSQL for historical tracking."
        )
        return
    # Time range selector
    days = st.selectbox("Time Range", [7, 14, 30, 90], index=0)
    try:
        analytics = db_client.get_analytics(days=days)
        if not analytics:
            st.info("No analytics data available yet. Run some analyses first!")
            return
        # Summary metrics
        st.subheader("Summary")
        col1, col2, col3 = st.columns(3)
        with col1:
            total = analytics.get("total_messages", 0)
            st.metric("Total Analyses", total)
        with col2:
            companies = len(analytics.get("by_company", {}))
            st.metric("Companies Analyzed", companies)
        with col3:
            types = len(analytics.get("by_type", {}))
            st.metric("Analysis Types", types)
        # Charts
        col1, col2 = st.columns(2)
        with col1:
            by_company = analytics.get("by_company", {})
            if by_company:
                df = pd.DataFrame(
                    [{"Company": k, "Count": v} for k, v in by_company.items()]
                )
                fig = px.pie(
                    df, values="Count", names="Company", title="Analyses by Company"
                )
                st.plotly_chart(fig, use_container_width=True)
        with col2:
            by_type = analytics.get("by_type", {})
            if by_type:
                df = pd.DataFrame(
                    [{"Type": k, "Count": v} for k, v in by_type.items()]
                )
                fig = px.bar(df, x="Type", y="Count", title="Analyses by Type")
                st.plotly_chart(fig, use_container_width=True)
        # Recent messages
        st.subheader("Recent Analyses")
        messages = db_client.get_messages(limit=10)
        if messages:
            for msg in messages:
                with st.expander(
                    f"{msg.get('company_name', 'Unknown')} - {msg.get('analysis_type', 'N/A')} ({msg.get('timestamp', 'N/A')})"
                ):
                    st.markdown(f"**Model:** {msg.get('model', 'N/A')}")
                    if msg.get("response"):
                        st.markdown(msg["response"][:500] + "...")
    except Exception as e:
        st.error(f"Error fetching analytics: {e}")
 def render_about():
    """Render about page."""
    st.header("About SPARC")
    st.markdown(
        """
    **SPARC** (Semiconductor Patent & Analytics Report Core) is a patent analysis
    system that estimates company performance by analyzing their patent portfolios
    using LLM-powered insights.
    ### Features
    - **Patent Retrieval**: Automated collection via SerpAPI's Google Patents engine
    - **Intelligent Parsing**: Extracts key sections from patent PDFs
    - **AI Analysis**: Uses Claude 3.5 Sonnet for deep analysis
    - **Batch Processing**: Analyze multiple companies concurrently
    - **REST API**: FastAPI web service for integration
    - **Analytics**: Track and visualize analysis history
    ### Technology Stack
    - **Backend**: Python, FastAPI
    - **AI**: Claude 3.5 Sonnet via OpenRouter
    - **Database**: PostgreSQL
    - **Dashboard**: Streamlit, Plotly
    - **Patent Data**: SerpAPI Google Patents
    ### Links
    - API Docs: `http://localhost:8000/docs`
    - Health Check: `http://localhost:8000/health`
    """
    )
    # System status
    st.subheader("System Status")
    col1, col2 = st.columns(2)
    with col1:
        db_client = get_db_client()
        if db_client:
            st.success("Database: Connected")
        else:
            st.warning("Database: Not configured")
    with col2:
        analyzer = get_analyzer()
        if analyzer:
            st.success("Analyzer: Ready")
        else:
            st.error("Analyzer: Not initialized")
 def main():
    """Main dashboard entry point."""
    render_header()
    page = render_sidebar()
    if page == "Company Analysis":
        render_company_analysis()
    elif page == "Batch Analysis":
        render_batch_analysis()
    elif page == "Analytics":
        render_analytics()
    elif page == "About":
        render_about()
 if __name__ == "__main__":
    main()
@@ -9,3 +9,6 @@ psycopg2-binary
 fastapi
 uvicorn[standard]
 httpx
 streamlit
 plotly
 pandas