Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import plotly.express as px | |
import plotly.graph_objects as go | |
from wordcloud import WordCloud | |
import matplotlib.pyplot as plt | |
import asyncio | |
import json | |
import base64 | |
from datetime import datetime | |
import io | |
import os | |
# Import our modules | |
from api import NewsAnalyzer | |
from utils import load_config, cache_results | |
from report import generate_pdf_report | |
# Configure page | |
st.set_page_config( | |
page_title="Global Business News Intelligence Dashboard", | |
page_icon="π", | |
layout="wide", | |
initial_sidebar_state="expanded" | |
) | |
# Custom CSS | |
st.markdown(""" | |
<style> | |
.main-header { | |
font-size: 2.5rem; | |
font-weight: bold; | |
text-align: center; | |
color: #2E86AB; | |
margin-bottom: 2rem; | |
} | |
.metric-card { | |
background-color: #f0f2f6; | |
padding: 1rem; | |
border-radius: 10px; | |
border-left: 4px solid #2E86AB; | |
} | |
.sentiment-positive { color: #28a745; font-weight: bold; } | |
.sentiment-negative { color: #dc3545; font-weight: bold; } | |
.sentiment-neutral { color: #6c757d; font-weight: bold; } | |
.audio-container { | |
background-color: #f8f9fa; | |
padding: 10px; | |
border-radius: 5px; | |
margin: 10px 0; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
# Initialize session state | |
if 'analyzer' not in st.session_state: | |
st.session_state.analyzer = NewsAnalyzer() | |
if 'results' not in st.session_state: | |
st.session_state.results = None | |
if 'analysis_complete' not in st.session_state: | |
st.session_state.analysis_complete = False | |
# Ensure extra session keys exist | |
if 'query' not in st.session_state: | |
st.session_state.query = '' | |
if 'progress' not in st.session_state: | |
st.session_state.progress = 0 | |
if 'progress_bar' not in st.session_state: | |
st.session_state.progress_bar = None | |
if 'status_text' not in st.session_state: | |
st.session_state.status_text = None | |
def main(): | |
# Header | |
st.markdown('<h1 class="main-header">π Global Business News Intelligence Dashboard</h1>', unsafe_allow_html=True) | |
st.markdown("**Real-time sentiment analysis, multilingual summaries, and audio insights for business intelligence**") | |
# Sidebar | |
with st.sidebar: | |
st.header("βοΈ Configuration") | |
# Input section | |
st.subheader("π― Target Analysis") | |
query_type = st.selectbox("Query Type", ["Company", "Stock Ticker", "Keyword", "Industry"]) | |
query = st.text_input(f"Enter {query_type}:", placeholder="e.g., Tesla, TSLA, AI technology") | |
st.subheader("π Analysis Settings") | |
num_articles = st.slider("Number of Articles", 5, 50, 20) | |
languages = st.multiselect( | |
"Summary Languages", | |
["English", "Hindi", "Tamil"], | |
default=["English"] | |
) | |
include_audio = st.checkbox("Generate Audio Summaries", True) | |
st.subheader("π§ Model Settings") | |
sentiment_models = st.multiselect( | |
"Sentiment Models", | |
["VADER", "Loughran-McDonald", "FinBERT"], | |
default=["VADER", "Loughran-McDonald", "FinBERT"] | |
) | |
# Analysis button | |
analyze_button = st.button("π Analyze News", type="primary", use_container_width=True) | |
# Main content area | |
if analyze_button and query: | |
st.session_state.analysis_complete = False | |
with st.spinner("π Analyzing news articles... This may take a few minutes."): | |
try: | |
# Create progress bar | |
progress_bar = st.progress(0) | |
# Store UI handles in session state for callbacks | |
st.session_state.progress_bar = progress_bar | |
st.session_state.status_text = status_text | |
status_text = st.empty() | |
# Run analysis | |
config = { | |
'query': query, | |
'num_articles': num_articles, | |
'languages': languages, | |
'include_audio': include_audio, | |
'sentiment_models': sentiment_models | |
} | |
# Update progress | |
status_text.text("π Scraping articles...") | |
progress_bar.progress(20) | |
results = st.session_state.analyzer.analyze_news(config, progress_callback=update_progress) | |
st.session_state.results = results | |
st.session_state.analysis_complete = True | |
progress_bar.progress(100) | |
status_text.text("β Analysis complete!") | |
except Exception as e: | |
st.error(f"Error during analysis: {str(e)}") | |
st.session_state.analysis_complete = False | |
# Display results | |
if st.session_state.analysis_complete and st.session_state.results: | |
display_results(st.session_state.results) | |
elif not st.session_state.analysis_complete and query: | |
st.info("π Click 'Analyze News' to start the analysis") | |
else: | |
show_demo_dashboard() | |
def update_progress(progress, status): | |
"""Callback function for progress updates""" | |
try: | |
st.session_state.progress = progress | |
if st.session_state.progress_bar is not None: | |
st.session_state.progress_bar.progress(int(max(0, min(100, progress)))) | |
if st.session_state.status_text is not None: | |
st.session_state.status_text.text(status) | |
except Exception: | |
pass | |
def display_results(results): | |
"""Display analysis results with interactive dashboard""" | |
st.header(f"π Analysis Results for: {results['query']}") | |
# Key metrics | |
col1, col2, col3, col4 = st.columns(4) | |
with col1: | |
st.markdown('<div class="metric-card">', unsafe_allow_html=True) | |
st.metric("Articles Analyzed", len(results['articles'])) | |
st.markdown('</div>', unsafe_allow_html=True) | |
with col2: | |
avg_sentiment = results['summary']['average_sentiment'] | |
sentiment_color = "sentiment-positive" if avg_sentiment > 0.1 else "sentiment-negative" if avg_sentiment < -0.1 else "sentiment-neutral" | |
st.markdown('<div class="metric-card">', unsafe_allow_html=True) | |
st.metric("Average Sentiment", f"{avg_sentiment:.3f}") | |
st.markdown('</div>', unsafe_allow_html=True) | |
with col3: | |
st.markdown('<div class="metric-card">', unsafe_allow_html=True) | |
st.metric("Sources", len(set([article['source'] for article in results['articles']]))) | |
st.markdown('</div>', unsafe_allow_html=True) | |
with col4: | |
st.markdown('<div class="metric-card">', unsafe_allow_html=True) | |
st.metric("Languages", len(results.get('languages', ['English']))) | |
st.markdown('</div>', unsafe_allow_html=True) | |
# Tabs for different views | |
tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs(["π Dashboard", "π° Articles", "π― Sentiment", "π£οΈ Audio", "π€ Export", "π API"]) | |
with tab1: | |
display_dashboard(results) | |
with tab2: | |
display_articles(results) | |
with tab3: | |
display_sentiment_analysis(results) | |
with tab4: | |
display_audio_summaries(results) | |
with tab5: | |
display_export_options(results) | |
with tab6: | |
display_api_info(results) | |
def display_dashboard(results): | |
"""Display main dashboard with charts""" | |
col1, col2 = st.columns(2) | |
with col1: | |
# Sentiment distribution | |
st.subheader("π Sentiment Distribution") | |
sentiment_counts = { | |
'Positive': sum(1 for article in results['articles'] if article['sentiment']['compound'] > 0.1), | |
'Negative': sum(1 for article in results['articles'] if article['sentiment']['compound'] < -0.1), | |
'Neutral': sum(1 for article in results['articles'] if -0.1 <= article['sentiment']['compound'] <= 0.1) | |
} | |
fig_pie = px.pie( | |
values=list(sentiment_counts.values()), | |
names=list(sentiment_counts.keys()), | |
color_discrete_map={'Positive': '#28a745', 'Negative': '#dc3545', 'Neutral': '#6c757d'} | |
) | |
st.plotly_chart(fig_pie, use_container_width=True) | |
with col2: | |
# Source distribution | |
st.subheader("π° Source Distribution") | |
source_counts = {} | |
for article in results['articles']: | |
source = article['source'] | |
source_counts[source] = source_counts.get(source, 0) + 1 | |
fig_bar = px.bar( | |
x=list(source_counts.keys()), | |
y=list(source_counts.values()), | |
color=list(source_counts.values()), | |
color_continuous_scale="viridis" | |
) | |
fig_bar.update_layout(xaxis_title="Source", yaxis_title="Article Count") | |
st.plotly_chart(fig_bar, use_container_width=True) | |
# Timeline chart | |
st.subheader("π Sentiment Over Time") | |
if results['articles']: | |
df_timeline = pd.DataFrame([ | |
{ | |
'date': article.get('date', datetime.now()), | |
'sentiment': article['sentiment']['compound'], | |
'title': article['title'][:50] + "..." if len(article['title']) > 50 else article['title'] | |
} | |
for article in results['articles'] | |
if 'date' in article | |
]) | |
if not df_timeline.empty: | |
fig_timeline = px.scatter( | |
df_timeline, | |
x='date', | |
y='sentiment', | |
hover_data=['title'], | |
color='sentiment', | |
color_continuous_scale=['red', 'gray', 'green'], | |
color_continuous_midpoint=0 | |
) | |
fig_timeline.update_layout( | |
xaxis_title="Date", | |
yaxis_title="Sentiment Score", | |
yaxis=dict(range=[-1, 1]) | |
) | |
st.plotly_chart(fig_timeline, use_container_width=True) | |
# Keywords word cloud | |
st.subheader("π€ Key Topics") | |
if 'keywords' in results and results['keywords']: | |
col1, col2 = st.columns([2, 1]) | |
with col1: | |
# Create word cloud | |
keywords_text = ' '.join([kw['keyword'] for kw in results['keywords'][:50]]) | |
if keywords_text: | |
wordcloud = WordCloud( | |
width=800, | |
height=400, | |
background_color='white', | |
colormap='viridis' | |
).generate(keywords_text) | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
ax.imshow(wordcloud, interpolation='bilinear') | |
ax.axis('off') | |
st.pyplot(fig) | |
with col2: | |
st.write("**Top Keywords:**") | |
for i, kw in enumerate(results['keywords'][:10]): | |
st.write(f"{i+1}. {kw['keyword']} ({kw['score']:.3f})") | |
def display_articles(results): | |
"""Display individual articles with summaries""" | |
st.subheader(f"π° Articles ({len(results['articles'])})") | |
for i, article in enumerate(results['articles']): | |
with st.expander(f"π {article['title']}", expanded=(i < 3)): | |
col1, col2 = st.columns([3, 1]) | |
with col1: | |
st.write(f"**Source:** {article['source']}") | |
if 'date' in article: | |
st.write(f"**Date:** {article['date']}") | |
st.write(f"**URL:** {article.get('url', 'N/A')}") | |
# Sentiment | |
sentiment = article['sentiment'] | |
sentiment_label = "Positive" if sentiment['compound'] > 0.1 else "Negative" if sentiment['compound'] < -0.1 else "Neutral" | |
sentiment_color = "sentiment-positive" if sentiment_label == "Positive" else "sentiment-negative" if sentiment_label == "Negative" else "sentiment-neutral" | |
st.markdown(f"**Sentiment:** <span class='{sentiment_color}'>{sentiment_label} ({sentiment['compound']:.3f})</span>", unsafe_allow_html=True) | |
with col2: | |
# Model-specific scores | |
st.write("**Model Scores:**") | |
if 'vader' in sentiment: | |
st.write(f"VADER: {sentiment['vader']:.3f}") | |
if 'loughran_mcdonald' in sentiment: | |
st.write(f"L&M: {sentiment['loughran_mcdonald']:.3f}") | |
if 'finbert' in sentiment: | |
st.write(f"FinBERT: {sentiment['finbert']:.3f}") | |
# Summary | |
if 'summary' in article: | |
st.write("**Summary:**") | |
st.write(article['summary']) | |
# Multilingual summaries | |
if 'summaries' in article: | |
for lang, summary in article['summaries'].items(): | |
if lang != 'English': | |
st.write(f"**Summary ({lang}):**") | |
st.write(summary) | |
def display_sentiment_analysis(results): | |
"""Display detailed sentiment analysis""" | |
st.subheader("π― Detailed Sentiment Analysis") | |
# Model comparison | |
if results['articles']: | |
model_data = [] | |
for article in results['articles']: | |
sentiment = article['sentiment'] | |
row = {'title': article['title'][:30] + "..."} | |
if 'vader' in sentiment: | |
row['VADER'] = sentiment['vader'] | |
if 'loughran_mcdonald' in sentiment: | |
row['Loughran-McDonald'] = sentiment['loughran_mcdonald'] | |
if 'finbert' in sentiment: | |
row['FinBERT'] = sentiment['finbert'] | |
row['Final Score'] = sentiment['compound'] | |
model_data.append(row) | |
df_models = pd.DataFrame(model_data) | |
st.write("**Model Comparison:**") | |
st.dataframe(df_models, use_container_width=True) | |
# Correlation heatmap | |
numeric_cols = [col for col in df_models.columns if col != 'title'] | |
if len(numeric_cols) > 1: | |
corr_matrix = df_models[numeric_cols].corr() | |
fig_heatmap = px.imshow( | |
corr_matrix, | |
text_auto=True, | |
aspect="auto", | |
color_continuous_scale="RdBu_r", | |
color_continuous_midpoint=0 | |
) | |
fig_heatmap.update_layout(title="Model Correlation Matrix") | |
st.plotly_chart(fig_heatmap, use_container_width=True) | |
# Top positive and negative articles | |
col1, col2 = st.columns(2) | |
with col1: | |
st.write("**Most Positive Articles:**") | |
positive_articles = sorted( | |
results['articles'], | |
key=lambda x: x['sentiment']['compound'], | |
reverse=True | |
)[:5] | |
for article in positive_articles: | |
st.write(f"β’ {article['title'][:50]}... ({article['sentiment']['compound']:.3f})") | |
with col2: | |
st.write("**Most Negative Articles:**") | |
negative_articles = sorted( | |
results['articles'], | |
key=lambda x: x['sentiment']['compound'] | |
)[:5] | |
for article in negative_articles: | |
st.write(f"β’ {article['title'][:50]}... ({article['sentiment']['compound']:.3f})") | |
def display_audio_summaries(results): | |
"""Display audio summaries for different languages""" | |
st.subheader("π΅ Audio Summaries") | |
if 'audio_files' in results: | |
for lang, audio_file in results['audio_files'].items(): | |
st.write(f"**{lang} Summary:**") | |
# Create audio player | |
if os.path.exists(audio_file): | |
with open(audio_file, 'rb') as audio_file_obj: | |
audio_bytes = audio_file_obj.read() | |
st.audio(audio_bytes, format='audio/mp3') | |
else: | |
st.write("Audio file not found") | |
else: | |
st.info("No audio summaries available. Enable audio generation in settings.") | |
def display_export_options(results): | |
"""Display export options""" | |
st.subheader("π€ Export Results") | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
# CSV Export | |
if st.button("π Download CSV", use_container_width=True): | |
csv_data = prepare_csv_export(results) | |
st.download_button( | |
label="Click to Download CSV", | |
data=csv_data, | |
file_name=f"news_analysis_{datetime.now().strftime('%Y%m%d_%H%M')}.csv", | |
mime="text/csv" | |
) | |
with col2: | |
# JSON Export | |
if st.button("π Download JSON", use_container_width=True): | |
json_data = json.dumps(results, indent=2, default=str) | |
st.download_button( | |
label="Click to Download JSON", | |
data=json_data, | |
file_name=f"news_analysis_{datetime.now().strftime('%Y%m%d_%H%M')}.json", | |
mime="application/json" | |
) | |
with col3: | |
# PDF Report | |
if st.button("π Generate PDF Report", use_container_width=True): | |
try: | |
pdf_buffer = generate_pdf_report(results) | |
st.download_button( | |
label="Click to Download PDF", | |
data=pdf_buffer, | |
file_name=f"news_analysis_report_{datetime.now().strftime('%Y%m%d_%H%M')}.pdf", | |
mime="application/pdf" | |
) | |
except Exception as e: | |
st.error(f"Error generating PDF: {str(e)}") | |
def display_api_info(results): | |
"""Display API information and examples""" | |
st.subheader("π API Access") | |
st.write("**Endpoint:** `/api/analyze`") | |
st.write("**Method:** GET") | |
st.write("**Parameters:**") | |
st.code(""" | |
- query: string (required) - Company name, ticker, or keyword | |
- num_articles: integer (default: 20) - Number of articles to analyze | |
- languages: array (default: ["English"]) - Summary languages | |
- include_audio: boolean (default: true) - Generate audio summaries | |
- sentiment_models: array (default: ["VADER", "Loughran-McDonald", "FinBERT"]) - Models to use | |
""") | |
st.write("**Example Request:**") | |
st.code(f"GET /api/analyze?query={results['query']}&num_articles=20") | |
st.write("**Sample Response:**") | |
sample_response = { | |
"query": results['query'], | |
"total_articles": len(results['articles']), | |
"average_sentiment": results['summary']['average_sentiment'], | |
"articles": results['articles'][:2] # Show first 2 articles as example | |
} | |
st.json(sample_response) | |
def prepare_csv_export(results): | |
"""Prepare CSV data for export""" | |
csv_data = [] | |
for article in results['articles']: | |
row = { | |
'title': article['title'], | |
'source': article['source'], | |
'url': article.get('url', ''), | |
'date': article.get('date', ''), | |
'sentiment_compound': article['sentiment']['compound'], | |
'sentiment_label': 'Positive' if article['sentiment']['compound'] > 0.1 else 'Negative' if article['sentiment']['compound'] < -0.1 else 'Neutral', | |
'summary': article.get('summary', '') | |
} | |
# Add model-specific scores | |
if 'vader' in article['sentiment']: | |
row['vader_score'] = article['sentiment']['vader'] | |
if 'loughran_mcdonald' in article['sentiment']: | |
row['loughran_mcdonald_score'] = article['sentiment']['loughran_mcdonald'] | |
if 'finbert' in article['sentiment']: | |
row['finbert_score'] = article['sentiment']['finbert'] | |
csv_data.append(row) | |
df = pd.DataFrame(csv_data) | |
return df.to_csv(index=False) | |
def show_demo_dashboard(): | |
"""Show demo dashboard with sample data""" | |
st.header("π Welcome to Global Business News Intelligence") | |
st.markdown(""" | |
### Key Features: | |
- **π Multi-Source News Scraping:** Aggregates news from reliable sources | |
- **π― Advanced Sentiment Analysis:** Uses VADER, Loughran-McDonald, and FinBERT models | |
- **π Multilingual Support:** Summaries in English, Hindi, and Tamil | |
- **π΅ Audio Generation:** Text-to-speech for all language summaries | |
- **π Interactive Dashboard:** Real-time charts and visualizations | |
- **π€ Multiple Export Formats:** CSV, JSON, and PDF reports | |
- **π API Access:** Programmatic access to all features | |
### Use Cases: | |
- **π Investment Research:** Track sentiment around stocks and companies | |
- **π’ Brand Monitoring:** Monitor public perception of your brand | |
- **π Market Intelligence:** Stay informed about industry trends | |
- **π° Media Analysis:** Analyze coverage patterns across sources | |
- **π Global Insights:** Access news in multiple languages | |
### Get Started: | |
1. Enter a company name, stock ticker, or keyword in the sidebar | |
2. Configure your analysis settings | |
3. Click "Analyze News" to start | |
4. Explore results in the interactive dashboard | |
5. Export your findings in multiple formats | |
""") | |
# Sample visualization | |
st.subheader("π Sample Analysis Dashboard") | |
# Create sample data | |
sample_data = { | |
'Sentiment': ['Positive', 'Negative', 'Neutral'], | |
'Count': [45, 15, 40] | |
} | |
fig = px.pie( | |
values=sample_data['Count'], | |
names=sample_data['Sentiment'], | |
color_discrete_map={'Positive': '#28a745', 'Negative': '#dc3545', 'Neutral': '#6c757d'}, | |
title="Sample Sentiment Distribution" | |
) | |
col1, col2 = st.columns([1, 1]) | |
with col1: | |
st.plotly_chart(fig, use_container_width=True) | |
with col2: | |
st.write("**Sample Metrics:**") | |
st.metric("Articles Analyzed", "100") | |
st.metric("Average Sentiment", "0.234") | |
st.metric("Sources Covered", "15") | |
st.metric("Languages", "3") | |
if __name__ == "__main__": | |
main() |