|
import streamlit as st |
|
import pandas as pd |
|
import plotly.express as px |
|
import plotly.graph_objects as go |
|
from wordcloud import WordCloud |
|
import matplotlib.pyplot as plt |
|
import asyncio |
|
import json |
|
import base64 |
|
from datetime import datetime |
|
import io |
|
import os |
|
|
|
|
|
from api import NewsAnalyzer |
|
from utils import load_config, cache_results |
|
from report import generate_pdf_report |
|
|
|
|
|
st.set_page_config( |
|
page_title="Global Business News Intelligence Dashboard", |
|
page_icon="π", |
|
layout="wide", |
|
initial_sidebar_state="expanded" |
|
) |
|
|
|
|
|
st.markdown(""" |
|
<style> |
|
.main-header { |
|
font-size: 2.5rem; |
|
font-weight: bold; |
|
text-align: center; |
|
color: #2E86AB; |
|
margin-bottom: 2rem; |
|
} |
|
.metric-card { |
|
background-color: #f0f2f6; |
|
padding: 1rem; |
|
border-radius: 10px; |
|
border-left: 4px solid #2E86AB; |
|
} |
|
.sentiment-positive { color: #28a745; font-weight: bold; } |
|
.sentiment-negative { color: #dc3545; font-weight: bold; } |
|
.sentiment-neutral { color: #6c757d; font-weight: bold; } |
|
.audio-container { |
|
background-color: #f8f9fa; |
|
padding: 10px; |
|
border-radius: 5px; |
|
margin: 10px 0; |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
if 'analyzer' not in st.session_state: |
|
st.session_state.analyzer = NewsAnalyzer() |
|
if 'results' not in st.session_state: |
|
st.session_state.results = None |
|
if 'analysis_complete' not in st.session_state: |
|
st.session_state.analysis_complete = False |
|
|
|
|
|
if 'query' not in st.session_state: |
|
st.session_state.query = '' |
|
if 'progress' not in st.session_state: |
|
st.session_state.progress = 0 |
|
if 'progress_bar' not in st.session_state: |
|
st.session_state.progress_bar = None |
|
if 'status_text' not in st.session_state: |
|
st.session_state.status_text = None |
|
|
|
def main(): |
|
|
|
st.markdown('<h1 class="main-header">π Global Business News Intelligence Dashboard</h1>', unsafe_allow_html=True) |
|
st.markdown("**Real-time sentiment analysis, multilingual summaries, and audio insights for business intelligence**") |
|
|
|
|
|
with st.sidebar: |
|
st.header("βοΈ Configuration") |
|
|
|
|
|
st.subheader("π― Target Analysis") |
|
query_type = st.selectbox("Query Type", ["Company", "Stock Ticker", "Keyword", "Industry"]) |
|
query = st.text_input(f"Enter {query_type}:", placeholder="e.g., Tesla, TSLA, AI technology") |
|
|
|
st.subheader("π Analysis Settings") |
|
num_articles = st.slider("Number of Articles", 5, 50, 20) |
|
languages = st.multiselect( |
|
"Summary Languages", |
|
["English", "Hindi", "Tamil"], |
|
default=["English"] |
|
) |
|
include_audio = st.checkbox("Generate Audio Summaries", True) |
|
|
|
st.subheader("π§ Model Settings") |
|
sentiment_models = st.multiselect( |
|
"Sentiment Models", |
|
["VADER", "Loughran-McDonald", "FinBERT"], |
|
default=["VADER", "Loughran-McDonald", "FinBERT"] |
|
) |
|
|
|
|
|
analyze_button = st.button("π Analyze News", type="primary", use_container_width=True) |
|
|
|
|
|
if analyze_button and query: |
|
st.session_state.analysis_complete = False |
|
with st.spinner("π Analyzing news articles... This may take a few minutes."): |
|
try: |
|
|
|
progress_bar = st.progress(0) |
|
|
|
st.session_state.progress_bar = progress_bar |
|
st.session_state.status_text = status_text |
|
|
|
status_text = st.empty() |
|
|
|
|
|
config = { |
|
'query': query, |
|
'num_articles': num_articles, |
|
'languages': languages, |
|
'include_audio': include_audio, |
|
'sentiment_models': sentiment_models |
|
} |
|
|
|
|
|
status_text.text("π Scraping articles...") |
|
progress_bar.progress(20) |
|
|
|
results = st.session_state.analyzer.analyze_news(config, progress_callback=update_progress) |
|
st.session_state.results = results |
|
st.session_state.analysis_complete = True |
|
|
|
progress_bar.progress(100) |
|
status_text.text("β
Analysis complete!") |
|
|
|
except Exception as e: |
|
st.error(f"Error during analysis: {str(e)}") |
|
st.session_state.analysis_complete = False |
|
|
|
|
|
if st.session_state.analysis_complete and st.session_state.results: |
|
display_results(st.session_state.results) |
|
|
|
elif not st.session_state.analysis_complete and query: |
|
st.info("π Click 'Analyze News' to start the analysis") |
|
|
|
else: |
|
show_demo_dashboard() |
|
|
|
def update_progress(progress, status): |
|
"""Callback function for progress updates""" |
|
try: |
|
st.session_state.progress = progress |
|
if st.session_state.progress_bar is not None: |
|
st.session_state.progress_bar.progress(int(max(0, min(100, progress)))) |
|
if st.session_state.status_text is not None: |
|
st.session_state.status_text.text(status) |
|
except Exception: |
|
pass |
|
|
|
|
|
|
|
def display_results(results): |
|
"""Display analysis results with interactive dashboard""" |
|
st.header(f"π Analysis Results for: {results['query']}") |
|
|
|
|
|
col1, col2, col3, col4 = st.columns(4) |
|
|
|
with col1: |
|
st.markdown('<div class="metric-card">', unsafe_allow_html=True) |
|
st.metric("Articles Analyzed", len(results['articles'])) |
|
st.markdown('</div>', unsafe_allow_html=True) |
|
|
|
with col2: |
|
avg_sentiment = results['summary']['average_sentiment'] |
|
sentiment_color = "sentiment-positive" if avg_sentiment > 0.1 else "sentiment-negative" if avg_sentiment < -0.1 else "sentiment-neutral" |
|
st.markdown('<div class="metric-card">', unsafe_allow_html=True) |
|
st.metric("Average Sentiment", f"{avg_sentiment:.3f}") |
|
st.markdown('</div>', unsafe_allow_html=True) |
|
|
|
with col3: |
|
st.markdown('<div class="metric-card">', unsafe_allow_html=True) |
|
st.metric("Sources", len(set([article['source'] for article in results['articles']]))) |
|
st.markdown('</div>', unsafe_allow_html=True) |
|
|
|
with col4: |
|
st.markdown('<div class="metric-card">', unsafe_allow_html=True) |
|
st.metric("Languages", len(results.get('languages', ['English']))) |
|
st.markdown('</div>', unsafe_allow_html=True) |
|
|
|
|
|
tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs(["π Dashboard", "π° Articles", "π― Sentiment", "π£οΈ Audio", "π€ Export", "π API"]) |
|
|
|
with tab1: |
|
display_dashboard(results) |
|
|
|
with tab2: |
|
display_articles(results) |
|
|
|
with tab3: |
|
display_sentiment_analysis(results) |
|
|
|
with tab4: |
|
display_audio_summaries(results) |
|
|
|
with tab5: |
|
display_export_options(results) |
|
|
|
with tab6: |
|
display_api_info(results) |
|
|
|
def display_dashboard(results): |
|
"""Display main dashboard with charts""" |
|
col1, col2 = st.columns(2) |
|
|
|
with col1: |
|
|
|
st.subheader("π Sentiment Distribution") |
|
sentiment_counts = { |
|
'Positive': sum(1 for article in results['articles'] if article['sentiment']['compound'] > 0.1), |
|
'Negative': sum(1 for article in results['articles'] if article['sentiment']['compound'] < -0.1), |
|
'Neutral': sum(1 for article in results['articles'] if -0.1 <= article['sentiment']['compound'] <= 0.1) |
|
} |
|
|
|
fig_pie = px.pie( |
|
values=list(sentiment_counts.values()), |
|
names=list(sentiment_counts.keys()), |
|
color_discrete_map={'Positive': '#28a745', 'Negative': '#dc3545', 'Neutral': '#6c757d'} |
|
) |
|
st.plotly_chart(fig_pie, use_container_width=True) |
|
|
|
with col2: |
|
|
|
st.subheader("π° Source Distribution") |
|
source_counts = {} |
|
for article in results['articles']: |
|
source = article['source'] |
|
source_counts[source] = source_counts.get(source, 0) + 1 |
|
|
|
fig_bar = px.bar( |
|
x=list(source_counts.keys()), |
|
y=list(source_counts.values()), |
|
color=list(source_counts.values()), |
|
color_continuous_scale="viridis" |
|
) |
|
fig_bar.update_layout(xaxis_title="Source", yaxis_title="Article Count") |
|
st.plotly_chart(fig_bar, use_container_width=True) |
|
|
|
|
|
st.subheader("π Sentiment Over Time") |
|
if results['articles']: |
|
df_timeline = pd.DataFrame([ |
|
{ |
|
'date': article.get('date', datetime.now()), |
|
'sentiment': article['sentiment']['compound'], |
|
'title': article['title'][:50] + "..." if len(article['title']) > 50 else article['title'] |
|
} |
|
for article in results['articles'] |
|
if 'date' in article |
|
]) |
|
|
|
if not df_timeline.empty: |
|
fig_timeline = px.scatter( |
|
df_timeline, |
|
x='date', |
|
y='sentiment', |
|
hover_data=['title'], |
|
color='sentiment', |
|
color_continuous_scale=['red', 'gray', 'green'], |
|
color_continuous_midpoint=0 |
|
) |
|
fig_timeline.update_layout( |
|
xaxis_title="Date", |
|
yaxis_title="Sentiment Score", |
|
yaxis=dict(range=[-1, 1]) |
|
) |
|
st.plotly_chart(fig_timeline, use_container_width=True) |
|
|
|
|
|
st.subheader("π€ Key Topics") |
|
if 'keywords' in results and results['keywords']: |
|
col1, col2 = st.columns([2, 1]) |
|
|
|
with col1: |
|
|
|
keywords_text = ' '.join([kw['keyword'] for kw in results['keywords'][:50]]) |
|
if keywords_text: |
|
wordcloud = WordCloud( |
|
width=800, |
|
height=400, |
|
background_color='white', |
|
colormap='viridis' |
|
).generate(keywords_text) |
|
|
|
fig, ax = plt.subplots(figsize=(10, 5)) |
|
ax.imshow(wordcloud, interpolation='bilinear') |
|
ax.axis('off') |
|
st.pyplot(fig) |
|
|
|
with col2: |
|
st.write("**Top Keywords:**") |
|
for i, kw in enumerate(results['keywords'][:10]): |
|
st.write(f"{i+1}. {kw['keyword']} ({kw['score']:.3f})") |
|
|
|
def display_articles(results): |
|
"""Display individual articles with summaries""" |
|
st.subheader(f"π° Articles ({len(results['articles'])})") |
|
|
|
for i, article in enumerate(results['articles']): |
|
with st.expander(f"π {article['title']}", expanded=(i < 3)): |
|
col1, col2 = st.columns([3, 1]) |
|
|
|
with col1: |
|
st.write(f"**Source:** {article['source']}") |
|
if 'date' in article: |
|
st.write(f"**Date:** {article['date']}") |
|
st.write(f"**URL:** {article.get('url', 'N/A')}") |
|
|
|
|
|
sentiment = article['sentiment'] |
|
sentiment_label = "Positive" if sentiment['compound'] > 0.1 else "Negative" if sentiment['compound'] < -0.1 else "Neutral" |
|
sentiment_color = "sentiment-positive" if sentiment_label == "Positive" else "sentiment-negative" if sentiment_label == "Negative" else "sentiment-neutral" |
|
st.markdown(f"**Sentiment:** <span class='{sentiment_color}'>{sentiment_label} ({sentiment['compound']:.3f})</span>", unsafe_allow_html=True) |
|
|
|
with col2: |
|
|
|
st.write("**Model Scores:**") |
|
if 'vader' in sentiment: |
|
st.write(f"VADER: {sentiment['vader']:.3f}") |
|
if 'loughran_mcdonald' in sentiment: |
|
st.write(f"L&M: {sentiment['loughran_mcdonald']:.3f}") |
|
if 'finbert' in sentiment: |
|
st.write(f"FinBERT: {sentiment['finbert']:.3f}") |
|
|
|
|
|
if 'summary' in article: |
|
st.write("**Summary:**") |
|
st.write(article['summary']) |
|
|
|
|
|
if 'summaries' in article: |
|
for lang, summary in article['summaries'].items(): |
|
if lang != 'English': |
|
st.write(f"**Summary ({lang}):**") |
|
st.write(summary) |
|
|
|
def display_sentiment_analysis(results): |
|
"""Display detailed sentiment analysis""" |
|
st.subheader("π― Detailed Sentiment Analysis") |
|
|
|
|
|
if results['articles']: |
|
model_data = [] |
|
for article in results['articles']: |
|
sentiment = article['sentiment'] |
|
row = {'title': article['title'][:30] + "..."} |
|
if 'vader' in sentiment: |
|
row['VADER'] = sentiment['vader'] |
|
if 'loughran_mcdonald' in sentiment: |
|
row['Loughran-McDonald'] = sentiment['loughran_mcdonald'] |
|
if 'finbert' in sentiment: |
|
row['FinBERT'] = sentiment['finbert'] |
|
row['Final Score'] = sentiment['compound'] |
|
model_data.append(row) |
|
|
|
df_models = pd.DataFrame(model_data) |
|
st.write("**Model Comparison:**") |
|
st.dataframe(df_models, use_container_width=True) |
|
|
|
|
|
numeric_cols = [col for col in df_models.columns if col != 'title'] |
|
if len(numeric_cols) > 1: |
|
corr_matrix = df_models[numeric_cols].corr() |
|
fig_heatmap = px.imshow( |
|
corr_matrix, |
|
text_auto=True, |
|
aspect="auto", |
|
color_continuous_scale="RdBu_r", |
|
color_continuous_midpoint=0 |
|
) |
|
fig_heatmap.update_layout(title="Model Correlation Matrix") |
|
st.plotly_chart(fig_heatmap, use_container_width=True) |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
|
with col1: |
|
st.write("**Most Positive Articles:**") |
|
positive_articles = sorted( |
|
results['articles'], |
|
key=lambda x: x['sentiment']['compound'], |
|
reverse=True |
|
)[:5] |
|
|
|
for article in positive_articles: |
|
st.write(f"β’ {article['title'][:50]}... ({article['sentiment']['compound']:.3f})") |
|
|
|
with col2: |
|
st.write("**Most Negative Articles:**") |
|
negative_articles = sorted( |
|
results['articles'], |
|
key=lambda x: x['sentiment']['compound'] |
|
)[:5] |
|
|
|
for article in negative_articles: |
|
st.write(f"β’ {article['title'][:50]}... ({article['sentiment']['compound']:.3f})") |
|
|
|
def display_audio_summaries(results): |
|
"""Display audio summaries for different languages""" |
|
st.subheader("π΅ Audio Summaries") |
|
|
|
if 'audio_files' in results: |
|
for lang, audio_file in results['audio_files'].items(): |
|
st.write(f"**{lang} Summary:**") |
|
|
|
|
|
if os.path.exists(audio_file): |
|
with open(audio_file, 'rb') as audio_file_obj: |
|
audio_bytes = audio_file_obj.read() |
|
st.audio(audio_bytes, format='audio/mp3') |
|
else: |
|
st.write("Audio file not found") |
|
else: |
|
st.info("No audio summaries available. Enable audio generation in settings.") |
|
|
|
def display_export_options(results): |
|
"""Display export options""" |
|
st.subheader("π€ Export Results") |
|
|
|
col1, col2, col3 = st.columns(3) |
|
|
|
with col1: |
|
|
|
if st.button("π Download CSV", use_container_width=True): |
|
csv_data = prepare_csv_export(results) |
|
st.download_button( |
|
label="Click to Download CSV", |
|
data=csv_data, |
|
file_name=f"news_analysis_{datetime.now().strftime('%Y%m%d_%H%M')}.csv", |
|
mime="text/csv" |
|
) |
|
|
|
with col2: |
|
|
|
if st.button("π Download JSON", use_container_width=True): |
|
json_data = json.dumps(results, indent=2, default=str) |
|
st.download_button( |
|
label="Click to Download JSON", |
|
data=json_data, |
|
file_name=f"news_analysis_{datetime.now().strftime('%Y%m%d_%H%M')}.json", |
|
mime="application/json" |
|
) |
|
|
|
with col3: |
|
|
|
if st.button("π Generate PDF Report", use_container_width=True): |
|
try: |
|
pdf_buffer = generate_pdf_report(results) |
|
st.download_button( |
|
label="Click to Download PDF", |
|
data=pdf_buffer, |
|
file_name=f"news_analysis_report_{datetime.now().strftime('%Y%m%d_%H%M')}.pdf", |
|
mime="application/pdf" |
|
) |
|
except Exception as e: |
|
st.error(f"Error generating PDF: {str(e)}") |
|
|
|
def display_api_info(results): |
|
"""Display API information and examples""" |
|
st.subheader("π API Access") |
|
|
|
st.write("**Endpoint:** `/api/analyze`") |
|
st.write("**Method:** GET") |
|
st.write("**Parameters:**") |
|
st.code(""" |
|
- query: string (required) - Company name, ticker, or keyword |
|
- num_articles: integer (default: 20) - Number of articles to analyze |
|
- languages: array (default: ["English"]) - Summary languages |
|
- include_audio: boolean (default: true) - Generate audio summaries |
|
- sentiment_models: array (default: ["VADER", "Loughran-McDonald", "FinBERT"]) - Models to use |
|
""") |
|
|
|
st.write("**Example Request:**") |
|
st.code(f"GET /api/analyze?query={results['query']}&num_articles=20") |
|
|
|
st.write("**Sample Response:**") |
|
sample_response = { |
|
"query": results['query'], |
|
"total_articles": len(results['articles']), |
|
"average_sentiment": results['summary']['average_sentiment'], |
|
"articles": results['articles'][:2] |
|
} |
|
st.json(sample_response) |
|
|
|
def prepare_csv_export(results): |
|
"""Prepare CSV data for export""" |
|
csv_data = [] |
|
|
|
for article in results['articles']: |
|
row = { |
|
'title': article['title'], |
|
'source': article['source'], |
|
'url': article.get('url', ''), |
|
'date': article.get('date', ''), |
|
'sentiment_compound': article['sentiment']['compound'], |
|
'sentiment_label': 'Positive' if article['sentiment']['compound'] > 0.1 else 'Negative' if article['sentiment']['compound'] < -0.1 else 'Neutral', |
|
'summary': article.get('summary', '') |
|
} |
|
|
|
|
|
if 'vader' in article['sentiment']: |
|
row['vader_score'] = article['sentiment']['vader'] |
|
if 'loughran_mcdonald' in article['sentiment']: |
|
row['loughran_mcdonald_score'] = article['sentiment']['loughran_mcdonald'] |
|
if 'finbert' in article['sentiment']: |
|
row['finbert_score'] = article['sentiment']['finbert'] |
|
|
|
csv_data.append(row) |
|
|
|
df = pd.DataFrame(csv_data) |
|
return df.to_csv(index=False) |
|
|
|
def show_demo_dashboard(): |
|
"""Show demo dashboard with sample data""" |
|
st.header("π Welcome to Global Business News Intelligence") |
|
|
|
st.markdown(""" |
|
### Key Features: |
|
- **π Multi-Source News Scraping:** Aggregates news from reliable sources |
|
- **π― Advanced Sentiment Analysis:** Uses VADER, Loughran-McDonald, and FinBERT models |
|
- **π Multilingual Support:** Summaries in English, Hindi, and Tamil |
|
- **π΅ Audio Generation:** Text-to-speech for all language summaries |
|
- **π Interactive Dashboard:** Real-time charts and visualizations |
|
- **π€ Multiple Export Formats:** CSV, JSON, and PDF reports |
|
- **π API Access:** Programmatic access to all features |
|
|
|
### Use Cases: |
|
- **π Investment Research:** Track sentiment around stocks and companies |
|
- **π’ Brand Monitoring:** Monitor public perception of your brand |
|
- **π Market Intelligence:** Stay informed about industry trends |
|
- **π° Media Analysis:** Analyze coverage patterns across sources |
|
- **π Global Insights:** Access news in multiple languages |
|
|
|
### Get Started: |
|
1. Enter a company name, stock ticker, or keyword in the sidebar |
|
2. Configure your analysis settings |
|
3. Click "Analyze News" to start |
|
4. Explore results in the interactive dashboard |
|
5. Export your findings in multiple formats |
|
""") |
|
|
|
|
|
st.subheader("π Sample Analysis Dashboard") |
|
|
|
|
|
sample_data = { |
|
'Sentiment': ['Positive', 'Negative', 'Neutral'], |
|
'Count': [45, 15, 40] |
|
} |
|
|
|
fig = px.pie( |
|
values=sample_data['Count'], |
|
names=sample_data['Sentiment'], |
|
color_discrete_map={'Positive': '#28a745', 'Negative': '#dc3545', 'Neutral': '#6c757d'}, |
|
title="Sample Sentiment Distribution" |
|
) |
|
|
|
col1, col2 = st.columns([1, 1]) |
|
with col1: |
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
with col2: |
|
st.write("**Sample Metrics:**") |
|
st.metric("Articles Analyzed", "100") |
|
st.metric("Average Sentiment", "0.234") |
|
st.metric("Sources Covered", "15") |
|
st.metric("Languages", "3") |
|
|
|
if __name__ == "__main__": |
|
main() |