Spaces:

wekey1998
/

news-sentiment-project

Running

App Files Files Community

news-sentiment-project / app.py

wekey1998

Update app.py

926ab7a verified 18 days ago

raw

history blame

22.1 kB

	import streamlit as st
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	from wordcloud import WordCloud
	import matplotlib.pyplot as plt
	import asyncio
	import json
	import base64
	from datetime import datetime
	import io
	import os

	# Import our modules
	from api import NewsAnalyzer
	from utils import load_config, cache_results
	from report import generate_pdf_report

	# Configure page
	st.set_page_config(
	page_title="Global Business News Intelligence Dashboard",
	page_icon="📊",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Custom CSS
	st.markdown("""
	<style>
	.main-header {
	font-size: 2.5rem;
	font-weight: bold;
	text-align: center;
	color: #2E86AB;
	margin-bottom: 2rem;
	}
	.metric-card {
	background-color: #f0f2f6;
	padding: 1rem;
	border-radius: 10px;
	border-left: 4px solid #2E86AB;
	}
	.sentiment-positive { color: #28a745; font-weight: bold; }
	.sentiment-negative { color: #dc3545; font-weight: bold; }
	.sentiment-neutral { color: #6c757d; font-weight: bold; }
	.audio-container {
	background-color: #f8f9fa;
	padding: 10px;
	border-radius: 5px;
	margin: 10px 0;
	}
	</style>
	""", unsafe_allow_html=True)

	# Initialize session state
	if 'analyzer' not in st.session_state:
	st.session_state.analyzer = NewsAnalyzer()
	if 'results' not in st.session_state:
	st.session_state.results = None
	if 'analysis_complete' not in st.session_state:
	st.session_state.analysis_complete = False

	# Ensure extra session keys exist
	if 'query' not in st.session_state:
	st.session_state.query = ''
	if 'progress' not in st.session_state:
	st.session_state.progress = 0
	if 'progress_bar' not in st.session_state:
	st.session_state.progress_bar = None
	if 'status_text' not in st.session_state:
	st.session_state.status_text = None

	def main():
	# Header
	st.markdown('<h1 class="main-header">🌐 Global Business News Intelligence Dashboard</h1>', unsafe_allow_html=True)
	st.markdown("Real-time sentiment analysis, multilingual summaries, and audio insights for business intelligence")

	# Sidebar
	with st.sidebar:
	st.header("⚙️ Configuration")

	# Input section
	st.subheader("🎯 Target Analysis")
	query_type = st.selectbox("Query Type", ["Company", "Stock Ticker", "Keyword", "Industry"])
	query = st.text_input(f"Enter {query_type}:", placeholder="e.g., Tesla, TSLA, AI technology")

	st.subheader("📊 Analysis Settings")
	num_articles = st.slider("Number of Articles", 5, 50, 20)
	languages = st.multiselect(
	"Summary Languages",
	["English", "Hindi", "Tamil"],
	default=["English"]
	)
	include_audio = st.checkbox("Generate Audio Summaries", True)

	st.subheader("🔧 Model Settings")
	sentiment_models = st.multiselect(
	"Sentiment Models",
	["VADER", "Loughran-McDonald", "FinBERT"],
	default=["VADER", "Loughran-McDonald", "FinBERT"]
	)

	# Analysis button
	analyze_button = st.button("🚀 Analyze News", type="primary", use_container_width=True)

	# Main content area
	if analyze_button and query:
	st.session_state.analysis_complete = False
	with st.spinner("🔍 Analyzing news articles... This may take a few minutes."):
	try:
	# Create progress bar
	progress_bar = st.progress(0)
	# Store UI handles in session state for callbacks
	st.session_state.progress_bar = progress_bar
	st.session_state.status_text = status_text

	status_text = st.empty()

	# Run analysis
	config = {
	'query': query,
	'num_articles': num_articles,
	'languages': languages,
	'include_audio': include_audio,
	'sentiment_models': sentiment_models
	}

	# Update progress
	status_text.text("🔍 Scraping articles...")
	progress_bar.progress(20)

	results = st.session_state.analyzer.analyze_news(config, progress_callback=update_progress)
	st.session_state.results = results
	st.session_state.analysis_complete = True

	progress_bar.progress(100)
	status_text.text("✅ Analysis complete!")

	except Exception as e:
	st.error(f"Error during analysis: {str(e)}")
	st.session_state.analysis_complete = False

	# Display results
	if st.session_state.analysis_complete and st.session_state.results:
	display_results(st.session_state.results)

	elif not st.session_state.analysis_complete and query:
	st.info("👆 Click 'Analyze News' to start the analysis")

	else:
	show_demo_dashboard()

	def update_progress(progress, status):
	"""Callback function for progress updates"""
	try:
	st.session_state.progress = progress
	if st.session_state.progress_bar is not None:
	st.session_state.progress_bar.progress(int(max(0, min(100, progress))))
	if st.session_state.status_text is not None:
	st.session_state.status_text.text(status)
	except Exception:
	pass



	def display_results(results):
	"""Display analysis results with interactive dashboard"""
	st.header(f"📈 Analysis Results for: {results['query']}")

	# Key metrics
	col1, col2, col3, col4 = st.columns(4)

	with col1:
	st.markdown('<div class="metric-card">', unsafe_allow_html=True)
	st.metric("Articles Analyzed", len(results['articles']))
	st.markdown('</div>', unsafe_allow_html=True)

	with col2:
	avg_sentiment = results['summary']['average_sentiment']
	sentiment_color = "sentiment-positive" if avg_sentiment > 0.1 else "sentiment-negative" if avg_sentiment < -0.1 else "sentiment-neutral"
	st.markdown('<div class="metric-card">', unsafe_allow_html=True)
	st.metric("Average Sentiment", f"{avg_sentiment:.3f}")
	st.markdown('</div>', unsafe_allow_html=True)

	with col3:
	st.markdown('<div class="metric-card">', unsafe_allow_html=True)
	st.metric("Sources", len(set([article['source'] for article in results['articles']])))
	st.markdown('</div>', unsafe_allow_html=True)

	with col4:
	st.markdown('<div class="metric-card">', unsafe_allow_html=True)
	st.metric("Languages", len(results.get('languages', ['English'])))
	st.markdown('</div>', unsafe_allow_html=True)

	# Tabs for different views
	tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs(["📊 Dashboard", "📰 Articles", "🎯 Sentiment", "🗣️ Audio", "📤 Export", "🔌 API"])

	with tab1:
	display_dashboard(results)

	with tab2:
	display_articles(results)

	with tab3:
	display_sentiment_analysis(results)

	with tab4:
	display_audio_summaries(results)

	with tab5:
	display_export_options(results)

	with tab6:
	display_api_info(results)

	def display_dashboard(results):
	"""Display main dashboard with charts"""
	col1, col2 = st.columns(2)

	with col1:
	# Sentiment distribution
	st.subheader("📊 Sentiment Distribution")
	sentiment_counts = {
	'Positive': sum(1 for article in results['articles'] if article['sentiment']['compound'] > 0.1),
	'Negative': sum(1 for article in results['articles'] if article['sentiment']['compound'] < -0.1),
	'Neutral': sum(1 for article in results['articles'] if -0.1 <= article['sentiment']['compound'] <= 0.1)
	}

	fig_pie = px.pie(
	values=list(sentiment_counts.values()),
	names=list(sentiment_counts.keys()),
	color_discrete_map={'Positive': '#28a745', 'Negative': '#dc3545', 'Neutral': '#6c757d'}
	)
	st.plotly_chart(fig_pie, use_container_width=True)

	with col2:
	# Source distribution
	st.subheader("📰 Source Distribution")
	source_counts = {}
	for article in results['articles']:
	source = article['source']
	source_counts[source] = source_counts.get(source, 0) + 1

	fig_bar = px.bar(
	x=list(source_counts.keys()),
	y=list(source_counts.values()),
	color=list(source_counts.values()),
	color_continuous_scale="viridis"
	)
	fig_bar.update_layout(xaxis_title="Source", yaxis_title="Article Count")
	st.plotly_chart(fig_bar, use_container_width=True)

	# Timeline chart
	st.subheader("📈 Sentiment Over Time")
	if results['articles']:
	df_timeline = pd.DataFrame([
	{
	'date': article.get('date', datetime.now()),
	'sentiment': article['sentiment']['compound'],
	'title': article['title'][:50] + "..." if len(article['title']) > 50 else article['title']
	}
	for article in results['articles']
	if 'date' in article
	])

	if not df_timeline.empty:
	fig_timeline = px.scatter(
	df_timeline,
	x='date',
	y='sentiment',
	hover_data=['title'],
	color='sentiment',
	color_continuous_scale=['red', 'gray', 'green'],
	color_continuous_midpoint=0
	)
	fig_timeline.update_layout(
	xaxis_title="Date",
	yaxis_title="Sentiment Score",
	yaxis=dict(range=[-1, 1])
	)
	st.plotly_chart(fig_timeline, use_container_width=True)

	# Keywords word cloud
	st.subheader("🔤 Key Topics")
	if 'keywords' in results and results['keywords']:
	col1, col2 = st.columns([2, 1])

	with col1:
	# Create word cloud
	keywords_text = ' '.join([kw['keyword'] for kw in results['keywords'][:50]])
	if keywords_text:
	wordcloud = WordCloud(
	width=800,
	height=400,
	background_color='white',
	colormap='viridis'
	).generate(keywords_text)

	fig, ax = plt.subplots(figsize=(10, 5))
	ax.imshow(wordcloud, interpolation='bilinear')
	ax.axis('off')
	st.pyplot(fig)

	with col2:
	st.write("Top Keywords:")
	for i, kw in enumerate(results['keywords'][:10]):
	st.write(f"{i+1}. {kw['keyword']} ({kw['score']:.3f})")

	def display_articles(results):
	"""Display individual articles with summaries"""
	st.subheader(f"📰 Articles ({len(results['articles'])})")

	for i, article in enumerate(results['articles']):
	with st.expander(f"📄 {article['title']}", expanded=(i < 3)):
	col1, col2 = st.columns([3, 1])

	with col1:
	st.write(f"Source: {article['source']}")
	if 'date' in article:
	st.write(f"Date: {article['date']}")
	st.write(f"URL: {article.get('url', 'N/A')}")

	# Sentiment
	sentiment = article['sentiment']
	sentiment_label = "Positive" if sentiment['compound'] > 0.1 else "Negative" if sentiment['compound'] < -0.1 else "Neutral"
	sentiment_color = "sentiment-positive" if sentiment_label == "Positive" else "sentiment-negative" if sentiment_label == "Negative" else "sentiment-neutral"
	st.markdown(f"Sentiment: <span class='{sentiment_color}'>{sentiment_label} ({sentiment['compound']:.3f})</span>", unsafe_allow_html=True)

	with col2:
	# Model-specific scores
	st.write("Model Scores:")
	if 'vader' in sentiment:
	st.write(f"VADER: {sentiment['vader']:.3f}")
	if 'loughran_mcdonald' in sentiment:
	st.write(f"L&M: {sentiment['loughran_mcdonald']:.3f}")
	if 'finbert' in sentiment:
	st.write(f"FinBERT: {sentiment['finbert']:.3f}")

	# Summary
	if 'summary' in article:
	st.write("Summary:")
	st.write(article['summary'])

	# Multilingual summaries
	if 'summaries' in article:
	for lang, summary in article['summaries'].items():
	if lang != 'English':
	st.write(f"Summary ({lang}):")
	st.write(summary)

	def display_sentiment_analysis(results):
	"""Display detailed sentiment analysis"""
	st.subheader("🎯 Detailed Sentiment Analysis")

	# Model comparison
	if results['articles']:
	model_data = []
	for article in results['articles']:
	sentiment = article['sentiment']
	row = {'title': article['title'][:30] + "..."}
	if 'vader' in sentiment:
	row['VADER'] = sentiment['vader']
	if 'loughran_mcdonald' in sentiment:
	row['Loughran-McDonald'] = sentiment['loughran_mcdonald']
	if 'finbert' in sentiment:
	row['FinBERT'] = sentiment['finbert']
	row['Final Score'] = sentiment['compound']
	model_data.append(row)

	df_models = pd.DataFrame(model_data)
	st.write("Model Comparison:")
	st.dataframe(df_models, use_container_width=True)

	# Correlation heatmap
	numeric_cols = [col for col in df_models.columns if col != 'title']
	if len(numeric_cols) > 1:
	corr_matrix = df_models[numeric_cols].corr()
	fig_heatmap = px.imshow(
	corr_matrix,
	text_auto=True,
	aspect="auto",
	color_continuous_scale="RdBu_r",
	color_continuous_midpoint=0
	)
	fig_heatmap.update_layout(title="Model Correlation Matrix")
	st.plotly_chart(fig_heatmap, use_container_width=True)

	# Top positive and negative articles
	col1, col2 = st.columns(2)

	with col1:
	st.write("Most Positive Articles:")
	positive_articles = sorted(
	results['articles'],
	key=lambda x: x['sentiment']['compound'],
	reverse=True
	)[:5]

	for article in positive_articles:
	st.write(f"• {article['title'][:50]}... ({article['sentiment']['compound']:.3f})")

	with col2:
	st.write("Most Negative Articles:")
	negative_articles = sorted(
	results['articles'],
	key=lambda x: x['sentiment']['compound']
	)[:5]

	for article in negative_articles:
	st.write(f"• {article['title'][:50]}... ({article['sentiment']['compound']:.3f})")

	def display_audio_summaries(results):
	"""Display audio summaries for different languages"""
	st.subheader("🎵 Audio Summaries")

	if 'audio_files' in results:
	for lang, audio_file in results['audio_files'].items():
	st.write(f"{lang} Summary:")

	# Create audio player
	if os.path.exists(audio_file):
	with open(audio_file, 'rb') as audio_file_obj:
	audio_bytes = audio_file_obj.read()
	st.audio(audio_bytes, format='audio/mp3')
	else:
	st.write("Audio file not found")
	else:
	st.info("No audio summaries available. Enable audio generation in settings.")

	def display_export_options(results):
	"""Display export options"""
	st.subheader("📤 Export Results")

	col1, col2, col3 = st.columns(3)

	with col1:
	# CSV Export
	if st.button("📊 Download CSV", use_container_width=True):
	csv_data = prepare_csv_export(results)
	st.download_button(
	label="Click to Download CSV",
	data=csv_data,
	file_name=f"news_analysis_{datetime.now().strftime('%Y%m%d_%H%M')}.csv",
	mime="text/csv"
	)

	with col2:
	# JSON Export
	if st.button("📋 Download JSON", use_container_width=True):
	json_data = json.dumps(results, indent=2, default=str)
	st.download_button(
	label="Click to Download JSON",
	data=json_data,
	file_name=f"news_analysis_{datetime.now().strftime('%Y%m%d_%H%M')}.json",
	mime="application/json"
	)

	with col3:
	# PDF Report
	if st.button("📄 Generate PDF Report", use_container_width=True):
	try:
	pdf_buffer = generate_pdf_report(results)
	st.download_button(
	label="Click to Download PDF",
	data=pdf_buffer,
	file_name=f"news_analysis_report_{datetime.now().strftime('%Y%m%d_%H%M')}.pdf",
	mime="application/pdf"
	)
	except Exception as e:
	st.error(f"Error generating PDF: {str(e)}")

	def display_api_info(results):
	"""Display API information and examples"""
	st.subheader("🔌 API Access")

	st.write("Endpoint: `/api/analyze`")
	st.write("Method: GET")
	st.write("Parameters:")
	st.code("""
	- query: string (required) - Company name, ticker, or keyword
	- num_articles: integer (default: 20) - Number of articles to analyze
	- languages: array (default: ["English"]) - Summary languages
	- include_audio: boolean (default: true) - Generate audio summaries
	- sentiment_models: array (default: ["VADER", "Loughran-McDonald", "FinBERT"]) - Models to use
	""")

	st.write("Example Request:")
	st.code(f"GET /api/analyze?query={results['query']}&num_articles=20")

	st.write("Sample Response:")
	sample_response = {
	"query": results['query'],
	"total_articles": len(results['articles']),
	"average_sentiment": results['summary']['average_sentiment'],
	"articles": results['articles'][:2] # Show first 2 articles as example
	}
	st.json(sample_response)

	def prepare_csv_export(results):
	"""Prepare CSV data for export"""
	csv_data = []

	for article in results['articles']:
	row = {
	'title': article['title'],
	'source': article['source'],
	'url': article.get('url', ''),
	'date': article.get('date', ''),
	'sentiment_compound': article['sentiment']['compound'],
	'sentiment_label': 'Positive' if article['sentiment']['compound'] > 0.1 else 'Negative' if article['sentiment']['compound'] < -0.1 else 'Neutral',
	'summary': article.get('summary', '')
	}

	# Add model-specific scores
	if 'vader' in article['sentiment']:
	row['vader_score'] = article['sentiment']['vader']
	if 'loughran_mcdonald' in article['sentiment']:
	row['loughran_mcdonald_score'] = article['sentiment']['loughran_mcdonald']
	if 'finbert' in article['sentiment']:
	row['finbert_score'] = article['sentiment']['finbert']

	csv_data.append(row)

	df = pd.DataFrame(csv_data)
	return df.to_csv(index=False)

	def show_demo_dashboard():
	"""Show demo dashboard with sample data"""
	st.header("🚀 Welcome to Global Business News Intelligence")

	st.markdown("""
	### Key Features:
	- 🔍 Multi-Source News Scraping: Aggregates news from reliable sources
	- 🎯 Advanced Sentiment Analysis: Uses VADER, Loughran-McDonald, and FinBERT models
	- 🌐 Multilingual Support: Summaries in English, Hindi, and Tamil
	- 🎵 Audio Generation: Text-to-speech for all language summaries
	- 📊 Interactive Dashboard: Real-time charts and visualizations
	- 📤 Multiple Export Formats: CSV, JSON, and PDF reports
	- 🔌 API Access: Programmatic access to all features

	### Use Cases:
	- 📈 Investment Research: Track sentiment around stocks and companies
	- 🏢 Brand Monitoring: Monitor public perception of your brand
	- 🔍 Market Intelligence: Stay informed about industry trends
	- 📰 Media Analysis: Analyze coverage patterns across sources
	- 🌍 Global Insights: Access news in multiple languages

	### Get Started:
	1. Enter a company name, stock ticker, or keyword in the sidebar
	2. Configure your analysis settings
	3. Click "Analyze News" to start
	4. Explore results in the interactive dashboard
	5. Export your findings in multiple formats
	""")

	# Sample visualization
	st.subheader("📊 Sample Analysis Dashboard")

	# Create sample data
	sample_data = {
	'Sentiment': ['Positive', 'Negative', 'Neutral'],
	'Count': [45, 15, 40]
	}

	fig = px.pie(
	values=sample_data['Count'],
	names=sample_data['Sentiment'],
	color_discrete_map={'Positive': '#28a745', 'Negative': '#dc3545', 'Neutral': '#6c757d'},
	title="Sample Sentiment Distribution"
	)

	col1, col2 = st.columns([1, 1])
	with col1:
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	st.write("Sample Metrics:")
	st.metric("Articles Analyzed", "100")
	st.metric("Average Sentiment", "0.234")
	st.metric("Sources Covered", "15")
	st.metric("Languages", "3")

	if __name__ == "__main__":
	main()