diff --git a/README.md b/README.md index 33321ba..4a91ac4 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ SPARC automatically collects, parses, and analyzes patents from companies to pro - **Portfolio Analysis**: Evaluates multiple patents holistically for comprehensive insights - **Batch Processing**: Analyze multiple companies concurrently with progress tracking - **REST API**: FastAPI web service with async job support +- **Dashboard**: Interactive Streamlit visualization dashboard - **Robust Testing**: 40 tests covering all major functionality ## Architecture @@ -167,6 +168,22 @@ curl -X POST http://localhost:8000/analyze/batch/async \ -d '{"companies": ["nvidia", "amd", "intel", "qualcomm"]}' ``` +### Visualization Dashboard + +Launch the interactive Streamlit dashboard: + +```bash +streamlit run dashboard.py +``` + +Dashboard features: +- **Company Analysis**: Analyze individual companies with real-time results +- **Batch Analysis**: Process multiple companies with progress tracking and charts +- **Analytics**: View historical analysis data and trends (requires database mode) +- **System Status**: Monitor database and analyzer health + +The dashboard runs at `http://localhost:8501` by default. + ## Running Tests ```bash @@ -202,7 +219,7 @@ pytest tests/ --cov=SPARC --cov-report=term-missing - [X] FastAPI web service wrapper - [X] Docker containerization - [X] Results persistence (database) -- [ ] Visualization dashboard +- [X] Visualization dashboard ## Development diff --git a/dashboard.py b/dashboard.py new file mode 100644 index 0000000..1994a56 --- /dev/null +++ b/dashboard.py @@ -0,0 +1,362 @@ +"""SPARC Visualization Dashboard. + +A Streamlit-based dashboard for visualizing patent analysis results. +Run with: streamlit run dashboard.py +""" + +import streamlit as st +import plotly.express as px +import plotly.graph_objects as go +import pandas as pd +from datetime import datetime, timedelta + +from SPARC.analyzer import CompanyAnalyzer +from SPARC.database import DatabaseClient +from SPARC import config + + +st.set_page_config( + page_title="SPARC Dashboard", + page_icon="📊", + layout="wide", + initial_sidebar_state="expanded", +) + + +@st.cache_resource +def get_analyzer(): + """Get or create the CompanyAnalyzer instance.""" + return CompanyAnalyzer() + + +@st.cache_resource +def get_db_client(): + """Get database client if available.""" + if config.use_database: + try: + client = DatabaseClient() + client.connect() + return client + except Exception: + return None + return None + + +def render_header(): + """Render the dashboard header.""" + st.title("SPARC Dashboard") + st.markdown("**Semiconductor Patent & Analytics Report Core**") + st.markdown("---") + + +def render_sidebar(): + """Render the sidebar with navigation and controls.""" + st.sidebar.title("Navigation") + page = st.sidebar.radio( + "Select Page", + ["Company Analysis", "Batch Analysis", "Analytics", "About"], + ) + return page + + +def render_company_analysis(): + """Render single company analysis page.""" + st.header("Company Patent Analysis") + + col1, col2 = st.columns([2, 1]) + + with col1: + company_name = st.text_input( + "Company Name", + placeholder="e.g., nvidia, intel, amd", + help="Enter the company name to analyze their patent portfolio", + ) + + with col2: + analyze_btn = st.button("Analyze", type="primary", use_container_width=True) + + if analyze_btn and company_name: + with st.spinner(f"Analyzing {company_name}..."): + analyzer = get_analyzer() + result = analyzer._analyze_company_safe(company_name) + + if result.success: + st.success(f"Analysis complete for {company_name}") + + # Metrics row + col1, col2, col3 = st.columns(3) + with col1: + st.metric("Patents Analyzed", result.patent_count) + with col2: + st.metric("Status", "Success") + with col3: + st.metric("Timestamp", result.timestamp.strftime("%H:%M:%S")) + + # Analysis content + st.subheader("AI Analysis") + st.markdown(result.analysis) + + else: + st.error(f"Analysis failed: {result.error}") + + +def render_batch_analysis(): + """Render batch analysis page.""" + st.header("Batch Company Analysis") + + st.markdown( + "Analyze multiple companies simultaneously. Enter company names separated by commas or newlines." + ) + + companies_input = st.text_area( + "Company Names", + placeholder="nvidia\namd\nintel\nqualcomm", + height=150, + ) + + col1, col2 = st.columns(2) + with col1: + max_workers = st.slider("Concurrent Workers", 1, 5, 3) + with col2: + analyze_btn = st.button( + "Run Batch Analysis", type="primary", use_container_width=True + ) + + if analyze_btn and companies_input: + # Parse company names + companies = [ + c.strip() + for c in companies_input.replace(",", "\n").split("\n") + if c.strip() + ] + + if not companies: + st.warning("Please enter at least one company name") + return + + st.info(f"Starting analysis of {len(companies)} companies...") + + # Progress tracking + progress_bar = st.progress(0) + status_text = st.empty() + + analyzer = get_analyzer() + + def update_progress(company: str, completed: int, total: int): + progress = completed / total + progress_bar.progress(progress) + status_text.text(f"Analyzing {company}... ({completed}/{total})") + + result = analyzer.analyze_companies( + companies=companies, + max_workers=max_workers, + progress_callback=update_progress, + ) + + progress_bar.progress(1.0) + status_text.text("Analysis complete!") + + # Summary metrics + st.subheader("Results Summary") + col1, col2, col3, col4 = st.columns(4) + with col1: + st.metric("Total Companies", result.total_companies) + with col2: + st.metric("Successful", result.successful) + with col3: + st.metric("Failed", result.failed) + with col4: + success_rate = ( + (result.successful / result.total_companies * 100) + if result.total_companies > 0 + else 0 + ) + st.metric("Success Rate", f"{success_rate:.1f}%") + + # Results chart + if result.results: + df = pd.DataFrame( + [ + { + "Company": r.company_name, + "Patents": r.patent_count, + "Status": "Success" if r.success else "Failed", + } + for r in result.results + ] + ) + + fig = px.bar( + df, + x="Company", + y="Patents", + color="Status", + color_discrete_map={"Success": "#28a745", "Failed": "#dc3545"}, + title="Patents per Company", + ) + st.plotly_chart(fig, use_container_width=True) + + # Individual results + st.subheader("Individual Results") + for r in result.results: + with st.expander( + f"{'✓' if r.success else '✗'} {r.company_name} ({r.patent_count} patents)" + ): + if r.success: + st.markdown(r.analysis) + else: + st.error(r.error) + + +def render_analytics(): + """Render analytics page with database insights.""" + st.header("Analytics Dashboard") + + db_client = get_db_client() + + if not db_client: + st.warning( + "Database mode is not enabled. Set USE_DATABASE=true in your .env file to enable analytics." + ) + st.info( + "Analytics features require storing analysis results in PostgreSQL for historical tracking." + ) + return + + # Time range selector + days = st.selectbox("Time Range", [7, 14, 30, 90], index=0) + + try: + analytics = db_client.get_analytics(days=days) + + if not analytics: + st.info("No analytics data available yet. Run some analyses first!") + return + + # Summary metrics + st.subheader("Summary") + col1, col2, col3 = st.columns(3) + + with col1: + total = analytics.get("total_messages", 0) + st.metric("Total Analyses", total) + + with col2: + companies = len(analytics.get("by_company", {})) + st.metric("Companies Analyzed", companies) + + with col3: + types = len(analytics.get("by_type", {})) + st.metric("Analysis Types", types) + + # Charts + col1, col2 = st.columns(2) + + with col1: + by_company = analytics.get("by_company", {}) + if by_company: + df = pd.DataFrame( + [{"Company": k, "Count": v} for k, v in by_company.items()] + ) + fig = px.pie( + df, values="Count", names="Company", title="Analyses by Company" + ) + st.plotly_chart(fig, use_container_width=True) + + with col2: + by_type = analytics.get("by_type", {}) + if by_type: + df = pd.DataFrame( + [{"Type": k, "Count": v} for k, v in by_type.items()] + ) + fig = px.bar(df, x="Type", y="Count", title="Analyses by Type") + st.plotly_chart(fig, use_container_width=True) + + # Recent messages + st.subheader("Recent Analyses") + messages = db_client.get_messages(limit=10) + + if messages: + for msg in messages: + with st.expander( + f"{msg.get('company_name', 'Unknown')} - {msg.get('analysis_type', 'N/A')} ({msg.get('timestamp', 'N/A')})" + ): + st.markdown(f"**Model:** {msg.get('model', 'N/A')}") + if msg.get("response"): + st.markdown(msg["response"][:500] + "...") + + except Exception as e: + st.error(f"Error fetching analytics: {e}") + + +def render_about(): + """Render about page.""" + st.header("About SPARC") + + st.markdown( + """ + **SPARC** (Semiconductor Patent & Analytics Report Core) is a patent analysis + system that estimates company performance by analyzing their patent portfolios + using LLM-powered insights. + + ### Features + + - **Patent Retrieval**: Automated collection via SerpAPI's Google Patents engine + - **Intelligent Parsing**: Extracts key sections from patent PDFs + - **AI Analysis**: Uses Claude 3.5 Sonnet for deep analysis + - **Batch Processing**: Analyze multiple companies concurrently + - **REST API**: FastAPI web service for integration + - **Analytics**: Track and visualize analysis history + + ### Technology Stack + + - **Backend**: Python, FastAPI + - **AI**: Claude 3.5 Sonnet via OpenRouter + - **Database**: PostgreSQL + - **Dashboard**: Streamlit, Plotly + - **Patent Data**: SerpAPI Google Patents + + ### Links + + - API Docs: `http://localhost:8000/docs` + - Health Check: `http://localhost:8000/health` + """ + ) + + # System status + st.subheader("System Status") + + col1, col2 = st.columns(2) + + with col1: + db_client = get_db_client() + if db_client: + st.success("Database: Connected") + else: + st.warning("Database: Not configured") + + with col2: + analyzer = get_analyzer() + if analyzer: + st.success("Analyzer: Ready") + else: + st.error("Analyzer: Not initialized") + + +def main(): + """Main dashboard entry point.""" + render_header() + page = render_sidebar() + + if page == "Company Analysis": + render_company_analysis() + elif page == "Batch Analysis": + render_batch_analysis() + elif page == "Analytics": + render_analytics() + elif page == "About": + render_about() + + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt index c081cb6..edab150 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,3 +9,6 @@ psycopg2-binary fastapi uvicorn[standard] httpx +streamlit +plotly +pandas