wekey1998's picture
Update app.py
926ab7a verified
raw
history blame
22.1 kB
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import asyncio
import json
import base64
from datetime import datetime
import io
import os
# Import our modules
from api import NewsAnalyzer
from utils import load_config, cache_results
from report import generate_pdf_report
# Configure page
st.set_page_config(
page_title="Global Business News Intelligence Dashboard",
page_icon="πŸ“Š",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS
st.markdown("""
<style>
.main-header {
font-size: 2.5rem;
font-weight: bold;
text-align: center;
color: #2E86AB;
margin-bottom: 2rem;
}
.metric-card {
background-color: #f0f2f6;
padding: 1rem;
border-radius: 10px;
border-left: 4px solid #2E86AB;
}
.sentiment-positive { color: #28a745; font-weight: bold; }
.sentiment-negative { color: #dc3545; font-weight: bold; }
.sentiment-neutral { color: #6c757d; font-weight: bold; }
.audio-container {
background-color: #f8f9fa;
padding: 10px;
border-radius: 5px;
margin: 10px 0;
}
</style>
""", unsafe_allow_html=True)
# Initialize session state
if 'analyzer' not in st.session_state:
st.session_state.analyzer = NewsAnalyzer()
if 'results' not in st.session_state:
st.session_state.results = None
if 'analysis_complete' not in st.session_state:
st.session_state.analysis_complete = False
# Ensure extra session keys exist
if 'query' not in st.session_state:
st.session_state.query = ''
if 'progress' not in st.session_state:
st.session_state.progress = 0
if 'progress_bar' not in st.session_state:
st.session_state.progress_bar = None
if 'status_text' not in st.session_state:
st.session_state.status_text = None
def main():
# Header
st.markdown('<h1 class="main-header">🌐 Global Business News Intelligence Dashboard</h1>', unsafe_allow_html=True)
st.markdown("**Real-time sentiment analysis, multilingual summaries, and audio insights for business intelligence**")
# Sidebar
with st.sidebar:
st.header("βš™οΈ Configuration")
# Input section
st.subheader("🎯 Target Analysis")
query_type = st.selectbox("Query Type", ["Company", "Stock Ticker", "Keyword", "Industry"])
query = st.text_input(f"Enter {query_type}:", placeholder="e.g., Tesla, TSLA, AI technology")
st.subheader("πŸ“Š Analysis Settings")
num_articles = st.slider("Number of Articles", 5, 50, 20)
languages = st.multiselect(
"Summary Languages",
["English", "Hindi", "Tamil"],
default=["English"]
)
include_audio = st.checkbox("Generate Audio Summaries", True)
st.subheader("πŸ”§ Model Settings")
sentiment_models = st.multiselect(
"Sentiment Models",
["VADER", "Loughran-McDonald", "FinBERT"],
default=["VADER", "Loughran-McDonald", "FinBERT"]
)
# Analysis button
analyze_button = st.button("πŸš€ Analyze News", type="primary", use_container_width=True)
# Main content area
if analyze_button and query:
st.session_state.analysis_complete = False
with st.spinner("πŸ” Analyzing news articles... This may take a few minutes."):
try:
# Create progress bar
progress_bar = st.progress(0)
# Store UI handles in session state for callbacks
st.session_state.progress_bar = progress_bar
st.session_state.status_text = status_text
status_text = st.empty()
# Run analysis
config = {
'query': query,
'num_articles': num_articles,
'languages': languages,
'include_audio': include_audio,
'sentiment_models': sentiment_models
}
# Update progress
status_text.text("πŸ” Scraping articles...")
progress_bar.progress(20)
results = st.session_state.analyzer.analyze_news(config, progress_callback=update_progress)
st.session_state.results = results
st.session_state.analysis_complete = True
progress_bar.progress(100)
status_text.text("βœ… Analysis complete!")
except Exception as e:
st.error(f"Error during analysis: {str(e)}")
st.session_state.analysis_complete = False
# Display results
if st.session_state.analysis_complete and st.session_state.results:
display_results(st.session_state.results)
elif not st.session_state.analysis_complete and query:
st.info("πŸ‘† Click 'Analyze News' to start the analysis")
else:
show_demo_dashboard()
def update_progress(progress, status):
"""Callback function for progress updates"""
try:
st.session_state.progress = progress
if st.session_state.progress_bar is not None:
st.session_state.progress_bar.progress(int(max(0, min(100, progress))))
if st.session_state.status_text is not None:
st.session_state.status_text.text(status)
except Exception:
pass
def display_results(results):
"""Display analysis results with interactive dashboard"""
st.header(f"πŸ“ˆ Analysis Results for: {results['query']}")
# Key metrics
col1, col2, col3, col4 = st.columns(4)
with col1:
st.markdown('<div class="metric-card">', unsafe_allow_html=True)
st.metric("Articles Analyzed", len(results['articles']))
st.markdown('</div>', unsafe_allow_html=True)
with col2:
avg_sentiment = results['summary']['average_sentiment']
sentiment_color = "sentiment-positive" if avg_sentiment > 0.1 else "sentiment-negative" if avg_sentiment < -0.1 else "sentiment-neutral"
st.markdown('<div class="metric-card">', unsafe_allow_html=True)
st.metric("Average Sentiment", f"{avg_sentiment:.3f}")
st.markdown('</div>', unsafe_allow_html=True)
with col3:
st.markdown('<div class="metric-card">', unsafe_allow_html=True)
st.metric("Sources", len(set([article['source'] for article in results['articles']])))
st.markdown('</div>', unsafe_allow_html=True)
with col4:
st.markdown('<div class="metric-card">', unsafe_allow_html=True)
st.metric("Languages", len(results.get('languages', ['English'])))
st.markdown('</div>', unsafe_allow_html=True)
# Tabs for different views
tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs(["πŸ“Š Dashboard", "πŸ“° Articles", "🎯 Sentiment", "πŸ—£οΈ Audio", "πŸ“€ Export", "πŸ”Œ API"])
with tab1:
display_dashboard(results)
with tab2:
display_articles(results)
with tab3:
display_sentiment_analysis(results)
with tab4:
display_audio_summaries(results)
with tab5:
display_export_options(results)
with tab6:
display_api_info(results)
def display_dashboard(results):
"""Display main dashboard with charts"""
col1, col2 = st.columns(2)
with col1:
# Sentiment distribution
st.subheader("πŸ“Š Sentiment Distribution")
sentiment_counts = {
'Positive': sum(1 for article in results['articles'] if article['sentiment']['compound'] > 0.1),
'Negative': sum(1 for article in results['articles'] if article['sentiment']['compound'] < -0.1),
'Neutral': sum(1 for article in results['articles'] if -0.1 <= article['sentiment']['compound'] <= 0.1)
}
fig_pie = px.pie(
values=list(sentiment_counts.values()),
names=list(sentiment_counts.keys()),
color_discrete_map={'Positive': '#28a745', 'Negative': '#dc3545', 'Neutral': '#6c757d'}
)
st.plotly_chart(fig_pie, use_container_width=True)
with col2:
# Source distribution
st.subheader("πŸ“° Source Distribution")
source_counts = {}
for article in results['articles']:
source = article['source']
source_counts[source] = source_counts.get(source, 0) + 1
fig_bar = px.bar(
x=list(source_counts.keys()),
y=list(source_counts.values()),
color=list(source_counts.values()),
color_continuous_scale="viridis"
)
fig_bar.update_layout(xaxis_title="Source", yaxis_title="Article Count")
st.plotly_chart(fig_bar, use_container_width=True)
# Timeline chart
st.subheader("πŸ“ˆ Sentiment Over Time")
if results['articles']:
df_timeline = pd.DataFrame([
{
'date': article.get('date', datetime.now()),
'sentiment': article['sentiment']['compound'],
'title': article['title'][:50] + "..." if len(article['title']) > 50 else article['title']
}
for article in results['articles']
if 'date' in article
])
if not df_timeline.empty:
fig_timeline = px.scatter(
df_timeline,
x='date',
y='sentiment',
hover_data=['title'],
color='sentiment',
color_continuous_scale=['red', 'gray', 'green'],
color_continuous_midpoint=0
)
fig_timeline.update_layout(
xaxis_title="Date",
yaxis_title="Sentiment Score",
yaxis=dict(range=[-1, 1])
)
st.plotly_chart(fig_timeline, use_container_width=True)
# Keywords word cloud
st.subheader("πŸ”€ Key Topics")
if 'keywords' in results and results['keywords']:
col1, col2 = st.columns([2, 1])
with col1:
# Create word cloud
keywords_text = ' '.join([kw['keyword'] for kw in results['keywords'][:50]])
if keywords_text:
wordcloud = WordCloud(
width=800,
height=400,
background_color='white',
colormap='viridis'
).generate(keywords_text)
fig, ax = plt.subplots(figsize=(10, 5))
ax.imshow(wordcloud, interpolation='bilinear')
ax.axis('off')
st.pyplot(fig)
with col2:
st.write("**Top Keywords:**")
for i, kw in enumerate(results['keywords'][:10]):
st.write(f"{i+1}. {kw['keyword']} ({kw['score']:.3f})")
def display_articles(results):
"""Display individual articles with summaries"""
st.subheader(f"πŸ“° Articles ({len(results['articles'])})")
for i, article in enumerate(results['articles']):
with st.expander(f"πŸ“„ {article['title']}", expanded=(i < 3)):
col1, col2 = st.columns([3, 1])
with col1:
st.write(f"**Source:** {article['source']}")
if 'date' in article:
st.write(f"**Date:** {article['date']}")
st.write(f"**URL:** {article.get('url', 'N/A')}")
# Sentiment
sentiment = article['sentiment']
sentiment_label = "Positive" if sentiment['compound'] > 0.1 else "Negative" if sentiment['compound'] < -0.1 else "Neutral"
sentiment_color = "sentiment-positive" if sentiment_label == "Positive" else "sentiment-negative" if sentiment_label == "Negative" else "sentiment-neutral"
st.markdown(f"**Sentiment:** <span class='{sentiment_color}'>{sentiment_label} ({sentiment['compound']:.3f})</span>", unsafe_allow_html=True)
with col2:
# Model-specific scores
st.write("**Model Scores:**")
if 'vader' in sentiment:
st.write(f"VADER: {sentiment['vader']:.3f}")
if 'loughran_mcdonald' in sentiment:
st.write(f"L&M: {sentiment['loughran_mcdonald']:.3f}")
if 'finbert' in sentiment:
st.write(f"FinBERT: {sentiment['finbert']:.3f}")
# Summary
if 'summary' in article:
st.write("**Summary:**")
st.write(article['summary'])
# Multilingual summaries
if 'summaries' in article:
for lang, summary in article['summaries'].items():
if lang != 'English':
st.write(f"**Summary ({lang}):**")
st.write(summary)
def display_sentiment_analysis(results):
"""Display detailed sentiment analysis"""
st.subheader("🎯 Detailed Sentiment Analysis")
# Model comparison
if results['articles']:
model_data = []
for article in results['articles']:
sentiment = article['sentiment']
row = {'title': article['title'][:30] + "..."}
if 'vader' in sentiment:
row['VADER'] = sentiment['vader']
if 'loughran_mcdonald' in sentiment:
row['Loughran-McDonald'] = sentiment['loughran_mcdonald']
if 'finbert' in sentiment:
row['FinBERT'] = sentiment['finbert']
row['Final Score'] = sentiment['compound']
model_data.append(row)
df_models = pd.DataFrame(model_data)
st.write("**Model Comparison:**")
st.dataframe(df_models, use_container_width=True)
# Correlation heatmap
numeric_cols = [col for col in df_models.columns if col != 'title']
if len(numeric_cols) > 1:
corr_matrix = df_models[numeric_cols].corr()
fig_heatmap = px.imshow(
corr_matrix,
text_auto=True,
aspect="auto",
color_continuous_scale="RdBu_r",
color_continuous_midpoint=0
)
fig_heatmap.update_layout(title="Model Correlation Matrix")
st.plotly_chart(fig_heatmap, use_container_width=True)
# Top positive and negative articles
col1, col2 = st.columns(2)
with col1:
st.write("**Most Positive Articles:**")
positive_articles = sorted(
results['articles'],
key=lambda x: x['sentiment']['compound'],
reverse=True
)[:5]
for article in positive_articles:
st.write(f"β€’ {article['title'][:50]}... ({article['sentiment']['compound']:.3f})")
with col2:
st.write("**Most Negative Articles:**")
negative_articles = sorted(
results['articles'],
key=lambda x: x['sentiment']['compound']
)[:5]
for article in negative_articles:
st.write(f"β€’ {article['title'][:50]}... ({article['sentiment']['compound']:.3f})")
def display_audio_summaries(results):
"""Display audio summaries for different languages"""
st.subheader("🎡 Audio Summaries")
if 'audio_files' in results:
for lang, audio_file in results['audio_files'].items():
st.write(f"**{lang} Summary:**")
# Create audio player
if os.path.exists(audio_file):
with open(audio_file, 'rb') as audio_file_obj:
audio_bytes = audio_file_obj.read()
st.audio(audio_bytes, format='audio/mp3')
else:
st.write("Audio file not found")
else:
st.info("No audio summaries available. Enable audio generation in settings.")
def display_export_options(results):
"""Display export options"""
st.subheader("πŸ“€ Export Results")
col1, col2, col3 = st.columns(3)
with col1:
# CSV Export
if st.button("πŸ“Š Download CSV", use_container_width=True):
csv_data = prepare_csv_export(results)
st.download_button(
label="Click to Download CSV",
data=csv_data,
file_name=f"news_analysis_{datetime.now().strftime('%Y%m%d_%H%M')}.csv",
mime="text/csv"
)
with col2:
# JSON Export
if st.button("πŸ“‹ Download JSON", use_container_width=True):
json_data = json.dumps(results, indent=2, default=str)
st.download_button(
label="Click to Download JSON",
data=json_data,
file_name=f"news_analysis_{datetime.now().strftime('%Y%m%d_%H%M')}.json",
mime="application/json"
)
with col3:
# PDF Report
if st.button("πŸ“„ Generate PDF Report", use_container_width=True):
try:
pdf_buffer = generate_pdf_report(results)
st.download_button(
label="Click to Download PDF",
data=pdf_buffer,
file_name=f"news_analysis_report_{datetime.now().strftime('%Y%m%d_%H%M')}.pdf",
mime="application/pdf"
)
except Exception as e:
st.error(f"Error generating PDF: {str(e)}")
def display_api_info(results):
"""Display API information and examples"""
st.subheader("πŸ”Œ API Access")
st.write("**Endpoint:** `/api/analyze`")
st.write("**Method:** GET")
st.write("**Parameters:**")
st.code("""
- query: string (required) - Company name, ticker, or keyword
- num_articles: integer (default: 20) - Number of articles to analyze
- languages: array (default: ["English"]) - Summary languages
- include_audio: boolean (default: true) - Generate audio summaries
- sentiment_models: array (default: ["VADER", "Loughran-McDonald", "FinBERT"]) - Models to use
""")
st.write("**Example Request:**")
st.code(f"GET /api/analyze?query={results['query']}&num_articles=20")
st.write("**Sample Response:**")
sample_response = {
"query": results['query'],
"total_articles": len(results['articles']),
"average_sentiment": results['summary']['average_sentiment'],
"articles": results['articles'][:2] # Show first 2 articles as example
}
st.json(sample_response)
def prepare_csv_export(results):
"""Prepare CSV data for export"""
csv_data = []
for article in results['articles']:
row = {
'title': article['title'],
'source': article['source'],
'url': article.get('url', ''),
'date': article.get('date', ''),
'sentiment_compound': article['sentiment']['compound'],
'sentiment_label': 'Positive' if article['sentiment']['compound'] > 0.1 else 'Negative' if article['sentiment']['compound'] < -0.1 else 'Neutral',
'summary': article.get('summary', '')
}
# Add model-specific scores
if 'vader' in article['sentiment']:
row['vader_score'] = article['sentiment']['vader']
if 'loughran_mcdonald' in article['sentiment']:
row['loughran_mcdonald_score'] = article['sentiment']['loughran_mcdonald']
if 'finbert' in article['sentiment']:
row['finbert_score'] = article['sentiment']['finbert']
csv_data.append(row)
df = pd.DataFrame(csv_data)
return df.to_csv(index=False)
def show_demo_dashboard():
"""Show demo dashboard with sample data"""
st.header("πŸš€ Welcome to Global Business News Intelligence")
st.markdown("""
### Key Features:
- **πŸ” Multi-Source News Scraping:** Aggregates news from reliable sources
- **🎯 Advanced Sentiment Analysis:** Uses VADER, Loughran-McDonald, and FinBERT models
- **🌐 Multilingual Support:** Summaries in English, Hindi, and Tamil
- **🎡 Audio Generation:** Text-to-speech for all language summaries
- **πŸ“Š Interactive Dashboard:** Real-time charts and visualizations
- **πŸ“€ Multiple Export Formats:** CSV, JSON, and PDF reports
- **πŸ”Œ API Access:** Programmatic access to all features
### Use Cases:
- **πŸ“ˆ Investment Research:** Track sentiment around stocks and companies
- **🏒 Brand Monitoring:** Monitor public perception of your brand
- **πŸ” Market Intelligence:** Stay informed about industry trends
- **πŸ“° Media Analysis:** Analyze coverage patterns across sources
- **🌍 Global Insights:** Access news in multiple languages
### Get Started:
1. Enter a company name, stock ticker, or keyword in the sidebar
2. Configure your analysis settings
3. Click "Analyze News" to start
4. Explore results in the interactive dashboard
5. Export your findings in multiple formats
""")
# Sample visualization
st.subheader("πŸ“Š Sample Analysis Dashboard")
# Create sample data
sample_data = {
'Sentiment': ['Positive', 'Negative', 'Neutral'],
'Count': [45, 15, 40]
}
fig = px.pie(
values=sample_data['Count'],
names=sample_data['Sentiment'],
color_discrete_map={'Positive': '#28a745', 'Negative': '#dc3545', 'Neutral': '#6c757d'},
title="Sample Sentiment Distribution"
)
col1, col2 = st.columns([1, 1])
with col1:
st.plotly_chart(fig, use_container_width=True)
with col2:
st.write("**Sample Metrics:**")
st.metric("Articles Analyzed", "100")
st.metric("Average Sentiment", "0.234")
st.metric("Sources Covered", "15")
st.metric("Languages", "3")
if __name__ == "__main__":
main()