wekey1998 commited on
Commit
aa675fc
·
verified ·
1 Parent(s): c844811

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +248 -565
app.py CHANGED
@@ -1,583 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
  import plotly.express as px
4
- import plotly.graph_objects as go
5
- from wordcloud import WordCloud
6
- import matplotlib.pyplot as plt
7
- import asyncio
8
- import json
9
- import base64
10
- from datetime import datetime
11
- import io
12
- import os
13
 
14
- # Import our modules
15
- from api import NewsAnalyzer
16
- from utils import load_config, cache_results
17
- from report import generate_pdf_report
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- # Configure page
20
  st.set_page_config(
21
- page_title="Global Business News Intelligence Dashboard",
22
- page_icon="📊",
23
  layout="wide",
24
- initial_sidebar_state="expanded"
25
  )
26
 
27
- # Custom CSS
28
- st.markdown("""
29
- <style>
30
- .main-header {
31
- font-size: 2.5rem;
32
- font-weight: bold;
33
- text-align: center;
34
- color: #2E86AB;
35
- margin-bottom: 2rem;
36
- }
37
- .metric-card {
38
- background-color: #f0f2f6;
39
- padding: 1rem;
40
- border-radius: 10px;
41
- border-left: 4px solid #2E86AB;
42
- }
43
- .sentiment-positive { color: #28a745; font-weight: bold; }
44
- .sentiment-negative { color: #dc3545; font-weight: bold; }
45
- .sentiment-neutral { color: #6c757d; font-weight: bold; }
46
- .audio-container {
47
- background-color: #f8f9fa;
48
- padding: 10px;
49
- border-radius: 5px;
50
- margin: 10px 0;
51
- }
52
- </style>
53
- """, unsafe_allow_html=True)
54
-
55
- # Initialize session state
56
- if 'analyzer' not in st.session_state:
57
- st.session_state.analyzer = NewsAnalyzer()
58
- if 'results' not in st.session_state:
59
- st.session_state.results = None
60
- if 'analysis_complete' not in st.session_state:
61
- st.session_state.analysis_complete = False
62
-
63
- # Ensure extra session keys exist
64
- if 'query' not in st.session_state:
65
- st.session_state.query = ''
66
- if 'progress' not in st.session_state:
67
- st.session_state.progress = 0
68
- if 'progress_bar' not in st.session_state:
69
- st.session_state.progress_bar = None
70
- if 'status_text' not in st.session_state:
71
- st.session_state.status_text = None
72
-
73
- def main():
74
- # Header
75
- st.markdown('<h1 class="main-header">🌐 Global Business News Intelligence Dashboard</h1>', unsafe_allow_html=True)
76
- st.markdown("**Real-time sentiment analysis, multilingual summaries, and audio insights for business intelligence**")
77
-
78
- # Sidebar
79
- with st.sidebar:
80
- st.header("⚙️ Configuration")
81
-
82
- # Input section
83
- st.subheader("🎯 Target Analysis")
84
- query_type = st.selectbox("Query Type", ["Company", "Stock Ticker", "Keyword", "Industry"])
85
- query = st.text_input(f"Enter {query_type}:", placeholder="e.g., Tesla, TSLA, AI technology")
86
-
87
- st.subheader("📊 Analysis Settings")
88
- num_articles = st.slider("Number of Articles", 5, 50, 20)
89
- languages = st.multiselect(
90
- "Summary Languages",
91
- ["English", "Hindi", "Tamil"],
92
- default=["English"]
93
- )
94
- include_audio = st.checkbox("Generate Audio Summaries", True)
95
-
96
- st.subheader("🔧 Model Settings")
97
- sentiment_models = st.multiselect(
98
- "Sentiment Models",
99
- ["VADER", "Loughran-McDonald", "FinBERT"],
100
- default=["VADER", "Loughran-McDonald", "FinBERT"]
101
  )
102
-
103
- # Analysis button
104
- analyze_button = st.button("🚀 Analyze News", type="primary", use_container_width=True)
105
-
106
- # Main content area
107
- if analyze_button and query:
108
- st.session_state.analysis_complete = False
109
- with st.spinner("🔍 Analyzing news articles... This may take a few minutes."):
110
- try:
111
- # Create progress UI
112
- progress_bar = st.progress(0)
113
- status_text = st.empty()
114
- # Store UI handles in session state for callbacks
115
- st.session_state.progress_bar = progress_bar
116
- st.session_state.status_text = status_text
117
-
118
- # Run analysis
119
- config = {
120
- 'query': query,
121
- 'num_articles': num_articles,
122
- 'languages': languages,
123
- 'include_audio': include_audio,
124
- 'sentiment_models': sentiment_models
125
- }
126
-
127
- # Update progress
128
- status_text.text("🔍 Scraping articles...")
129
- progress_bar.progress(20)
130
-
131
- results = st.session_state.analyzer.analyze_news(config, progress_callback=update_progress)
132
- st.session_state.results = results
133
- st.session_state.analysis_complete = True
134
-
135
- progress_bar.progress(100)
136
- status_text.text("✅ Analysis complete!")
137
-
138
- except Exception as e:
139
- st.error(f"Error during analysis: {str(e)}")
140
- st.session_state.analysis_complete = False
141
-
142
- # Display results
143
- if st.session_state.analysis_complete and st.session_state.results:
144
- display_results(st.session_state.results)
145
-
146
- elif not st.session_state.analysis_complete and query:
147
- st.info("👆 Click 'Analyze News' to start the analysis")
148
-
149
- else:
150
- show_demo_dashboard()
151
-
152
- def update_progress(progress, status):
153
- """Callback function for progress updates"""
154
- try:
155
- st.session_state.progress = progress
156
- if st.session_state.progress_bar is not None:
157
- st.session_state.progress_bar.progress(int(max(0, min(100, progress))))
158
- if st.session_state.status_text is not None:
159
- st.session_state.status_text.text(status)
160
- except Exception:
161
- pass
162
-
163
-
164
-
165
- def display_results(results):
166
- """Display analysis results with interactive dashboard"""
167
- st.header(f"📈 Analysis Results for: {results['query']}")
168
-
169
- # Key metrics
170
- col1, col2, col3, col4 = st.columns(4)
171
-
172
- with col1:
173
- st.markdown('<div class="metric-card">', unsafe_allow_html=True)
174
- st.metric("Articles Analyzed", len(results['articles']))
175
- st.markdown('</div>', unsafe_allow_html=True)
176
-
177
- with col2:
178
- avg_sentiment = results['summary']['average_sentiment']
179
- sentiment_color = "sentiment-positive" if avg_sentiment > 0.1 else "sentiment-negative" if avg_sentiment < -0.1 else "sentiment-neutral"
180
- st.markdown('<div class="metric-card">', unsafe_allow_html=True)
181
- st.metric("Average Sentiment", f"{avg_sentiment:.3f}")
182
- st.markdown('</div>', unsafe_allow_html=True)
183
-
184
- with col3:
185
- st.markdown('<div class="metric-card">', unsafe_allow_html=True)
186
- st.metric("Sources", len(set([article['source'] for article in results['articles']])))
187
- st.markdown('</div>', unsafe_allow_html=True)
188
-
189
- with col4:
190
- st.markdown('<div class="metric-card">', unsafe_allow_html=True)
191
- st.metric("Languages", len(results.get('languages', ['English'])))
192
- st.markdown('</div>', unsafe_allow_html=True)
193
-
194
- # Tabs for different views
195
- tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs(["📊 Dashboard", "📰 Articles", "🎯 Sentiment", "🗣️ Audio", "📤 Export", "🔌 API"])
196
-
197
- with tab1:
198
- display_dashboard(results)
199
-
200
- with tab2:
201
- display_articles(results)
202
-
203
- with tab3:
204
- display_sentiment_analysis(results)
205
-
206
- with tab4:
207
- display_audio_summaries(results)
208
-
209
- with tab5:
210
- display_export_options(results)
211
-
212
- with tab6:
213
- display_api_info(results)
214
-
215
- def display_dashboard(results):
216
- """Display main dashboard with charts"""
217
- col1, col2 = st.columns(2)
218
-
219
- with col1:
220
- # Sentiment distribution
221
- st.subheader("📊 Sentiment Distribution")
222
- sentiment_counts = {
223
- 'Positive': sum(1 for article in results['articles'] if article['sentiment']['compound'] > 0.1),
224
- 'Negative': sum(1 for article in results['articles'] if article['sentiment']['compound'] < -0.1),
225
- 'Neutral': sum(1 for article in results['articles'] if -0.1 <= article['sentiment']['compound'] <= 0.1)
226
  }
227
-
228
- fig_pie = px.pie(
229
- values=list(sentiment_counts.values()),
230
- names=list(sentiment_counts.keys()),
231
- color_discrete_map={'Positive': '#28a745', 'Negative': '#dc3545', 'Neutral': '#6c757d'}
 
 
 
 
 
 
 
 
 
 
 
 
232
  )
233
- st.plotly_chart(fig_pie, use_container_width=True)
234
-
235
- with col2:
236
- # Source distribution
237
- st.subheader("📰 Source Distribution")
238
- source_counts = {}
239
- for article in results['articles']:
240
- source = article['source']
241
- source_counts[source] = source_counts.get(source, 0) + 1
242
-
243
- fig_bar = px.bar(
244
- x=list(source_counts.keys()),
245
- y=list(source_counts.values()),
246
- color=list(source_counts.values()),
247
- color_continuous_scale="viridis"
248
  )
249
- fig_bar.update_layout(xaxis_title="Source", yaxis_title="Article Count")
250
- st.plotly_chart(fig_bar, use_container_width=True)
251
-
252
- # Timeline chart
253
- st.subheader("📈 Sentiment Over Time")
254
- if results['articles']:
255
- df_timeline = pd.DataFrame([
256
- {
257
- 'date': article.get('date', datetime.now()),
258
- 'sentiment': article['sentiment']['compound'],
259
- 'title': article['title'][:50] + "..." if len(article['title']) > 50 else article['title']
260
- }
261
- for article in results['articles']
262
- if 'date' in article
263
- ])
264
-
265
- if not df_timeline.empty:
266
- fig_timeline = px.scatter(
267
- df_timeline,
268
- x='date',
269
- y='sentiment',
270
- hover_data=['title'],
271
- color='sentiment',
272
- color_continuous_scale=['red', 'gray', 'green'],
273
- color_continuous_midpoint=0
274
- )
275
- fig_timeline.update_layout(
276
- xaxis_title="Date",
277
- yaxis_title="Sentiment Score",
278
- yaxis=dict(range=[-1, 1])
279
- )
280
- st.plotly_chart(fig_timeline, use_container_width=True)
281
-
282
- # Keywords word cloud
283
- st.subheader("🔤 Key Topics")
284
- if 'keywords' in results and results['keywords']:
285
- col1, col2 = st.columns([2, 1])
286
-
287
- with col1:
288
- # Create word cloud
289
- keywords_text = ' '.join([kw['keyword'] for kw in results['keywords'][:50]])
290
- if keywords_text:
291
- wordcloud = WordCloud(
292
- width=800,
293
- height=400,
294
- background_color='white',
295
- colormap='viridis'
296
- ).generate(keywords_text)
297
-
298
- fig, ax = plt.subplots(figsize=(10, 5))
299
- ax.imshow(wordcloud, interpolation='bilinear')
300
- ax.axis('off')
301
- st.pyplot(fig)
302
-
303
- with col2:
304
- st.write("**Top Keywords:**")
305
- for i, kw in enumerate(results['keywords'][:10]):
306
- st.write(f"{i+1}. {kw['keyword']} ({kw['score']:.3f})")
307
-
308
- def display_articles(results):
309
- """Display individual articles with summaries"""
310
- st.subheader(f"📰 Articles ({len(results['articles'])})")
311
-
312
- for i, article in enumerate(results['articles']):
313
- with st.expander(f"📄 {article['title']}", expanded=(i < 3)):
314
- col1, col2 = st.columns([3, 1])
315
-
316
- with col1:
317
- st.write(f"**Source:** {article['source']}")
318
- if 'date' in article:
319
- st.write(f"**Date:** {article['date']}")
320
- st.write(f"**URL:** {article.get('url', 'N/A')}")
321
-
322
- # Sentiment
323
- sentiment = article['sentiment']
324
- sentiment_label = "Positive" if sentiment['compound'] > 0.1 else "Negative" if sentiment['compound'] < -0.1 else "Neutral"
325
- sentiment_color = "sentiment-positive" if sentiment_label == "Positive" else "sentiment-negative" if sentiment_label == "Negative" else "sentiment-neutral"
326
- st.markdown(f"**Sentiment:** <span class='{sentiment_color}'>{sentiment_label} ({sentiment['compound']:.3f})</span>", unsafe_allow_html=True)
327
-
328
- with col2:
329
- # Model-specific scores
330
- st.write("**Model Scores:**")
331
- if 'vader' in sentiment:
332
- st.write(f"VADER: {sentiment['vader']:.3f}")
333
- if 'loughran_mcdonald' in sentiment:
334
- st.write(f"L&M: {sentiment['loughran_mcdonald']:.3f}")
335
- if 'finbert' in sentiment:
336
- st.write(f"FinBERT: {sentiment['finbert']:.3f}")
337
-
338
- # Summary
339
- if 'summary' in article:
340
- st.write("**Summary:**")
341
- st.write(article['summary'])
342
-
343
- # Multilingual summaries
344
- if 'summaries' in article:
345
- for lang, summary in article['summaries'].items():
346
- if lang != 'English':
347
- st.write(f"**Summary ({lang}):**")
348
- st.write(summary)
349
-
350
- def display_sentiment_analysis(results):
351
- """Display detailed sentiment analysis"""
352
- st.subheader("🎯 Detailed Sentiment Analysis")
353
-
354
- # Model comparison
355
- if results['articles']:
356
- model_data = []
357
- for article in results['articles']:
358
- sentiment = article['sentiment']
359
- row = {'title': article['title'][:30] + "..."}
360
- if 'vader' in sentiment:
361
- row['VADER'] = sentiment['vader']
362
- if 'loughran_mcdonald' in sentiment:
363
- row['Loughran-McDonald'] = sentiment['loughran_mcdonald']
364
- if 'finbert' in sentiment:
365
- row['FinBERT'] = sentiment['finbert']
366
- row['Final Score'] = sentiment['compound']
367
- model_data.append(row)
368
-
369
- df_models = pd.DataFrame(model_data)
370
- st.write("**Model Comparison:**")
371
- st.dataframe(df_models, use_container_width=True)
372
-
373
- # Correlation heatmap
374
- numeric_cols = [col for col in df_models.columns if col != 'title']
375
- if len(numeric_cols) > 1:
376
- corr_matrix = df_models[numeric_cols].corr()
377
- fig_heatmap = px.imshow(
378
- corr_matrix,
379
- text_auto=True,
380
- aspect="auto",
381
- color_continuous_scale="RdBu_r",
382
- color_continuous_midpoint=0
383
- )
384
- fig_heatmap.update_layout(title="Model Correlation Matrix")
385
- st.plotly_chart(fig_heatmap, use_container_width=True)
386
-
387
- # Top positive and negative articles
388
- col1, col2 = st.columns(2)
389
-
390
- with col1:
391
- st.write("**Most Positive Articles:**")
392
- positive_articles = sorted(
393
- results['articles'],
394
- key=lambda x: x['sentiment']['compound'],
395
- reverse=True
396
- )[:5]
397
-
398
- for article in positive_articles:
399
- st.write(f"• {article['title'][:50]}... ({article['sentiment']['compound']:.3f})")
400
-
401
- with col2:
402
- st.write("**Most Negative Articles:**")
403
- negative_articles = sorted(
404
- results['articles'],
405
- key=lambda x: x['sentiment']['compound']
406
- )[:5]
407
-
408
- for article in negative_articles:
409
- st.write(f"• {article['title'][:50]}... ({article['sentiment']['compound']:.3f})")
410
-
411
- def display_audio_summaries(results):
412
- """Display audio summaries for different languages"""
413
- st.subheader("🎵 Audio Summaries")
414
-
415
- if 'audio_files' in results:
416
- for lang, audio_file in results['audio_files'].items():
417
- st.write(f"**{lang} Summary:**")
418
-
419
- # Create audio player
420
- if os.path.exists(audio_file):
421
- with open(audio_file, 'rb') as audio_file_obj:
422
- audio_bytes = audio_file_obj.read()
423
- st.audio(audio_bytes, format='audio/mp3')
424
  else:
425
- st.write("Audio file not found")
426
- else:
427
- st.info("No audio summaries available. Enable audio generation in settings.")
428
-
429
- def display_export_options(results):
430
- """Display export options"""
431
- st.subheader("📤 Export Results")
432
-
433
- col1, col2, col3 = st.columns(3)
434
-
435
- with col1:
436
- # CSV Export
437
- if st.button("📊 Download CSV", use_container_width=True):
438
- csv_data = prepare_csv_export(results)
439
- st.download_button(
440
- label="Click to Download CSV",
441
- data=csv_data,
442
- file_name=f"news_analysis_{datetime.now().strftime('%Y%m%d_%H%M')}.csv",
443
- mime="text/csv"
444
- )
445
-
446
- with col2:
447
- # JSON Export
448
- if st.button("📋 Download JSON", use_container_width=True):
449
- json_data = json.dumps(results, indent=2, default=str)
450
  st.download_button(
451
- label="Click to Download JSON",
452
- data=json_data,
453
- file_name=f"news_analysis_{datetime.now().strftime('%Y%m%d_%H%M')}.json",
454
- mime="application/json"
 
455
  )
456
-
457
- with col3:
458
- # PDF Report
459
- if st.button("📄 Generate PDF Report", use_container_width=True):
 
 
 
 
 
 
 
 
460
  try:
461
- pdf_buffer = generate_pdf_report(results)
462
- st.download_button(
463
- label="Click to Download PDF",
464
- data=pdf_buffer,
465
- file_name=f"news_analysis_report_{datetime.now().strftime('%Y%m%d_%H%M')}.pdf",
466
- mime="application/pdf"
467
- )
468
- except Exception as e:
469
- st.error(f"Error generating PDF: {str(e)}")
470
-
471
- def display_api_info(results):
472
- """Display API information and examples"""
473
- st.subheader("🔌 API Access")
474
-
475
- st.write("**Endpoint:** `/api/analyze`")
476
- st.write("**Method:** GET")
477
- st.write("**Parameters:**")
478
- st.code("""
479
- - query: string (required) - Company name, ticker, or keyword
480
- - num_articles: integer (default: 20) - Number of articles to analyze
481
- - languages: array (default: ["English"]) - Summary languages
482
- - include_audio: boolean (default: true) - Generate audio summaries
483
- - sentiment_models: array (default: ["VADER", "Loughran-McDonald", "FinBERT"]) - Models to use
484
- """)
485
-
486
- st.write("**Example Request:**")
487
- st.code(f"GET /api/analyze?query={results['query']}&num_articles=20")
488
-
489
- st.write("**Sample Response:**")
490
- sample_response = {
491
- "query": results['query'],
492
- "total_articles": len(results['articles']),
493
- "average_sentiment": results['summary']['average_sentiment'],
494
- "articles": results['articles'][:2] # Show first 2 articles as example
495
- }
496
- st.json(sample_response)
497
-
498
- def prepare_csv_export(results):
499
- """Prepare CSV data for export"""
500
- csv_data = []
501
-
502
- for article in results['articles']:
503
- row = {
504
- 'title': article['title'],
505
- 'source': article['source'],
506
- 'url': article.get('url', ''),
507
- 'date': article.get('date', ''),
508
- 'sentiment_compound': article['sentiment']['compound'],
509
- 'sentiment_label': 'Positive' if article['sentiment']['compound'] > 0.1 else 'Negative' if article['sentiment']['compound'] < -0.1 else 'Neutral',
510
- 'summary': article.get('summary', '')
511
- }
512
-
513
- # Add model-specific scores
514
- if 'vader' in article['sentiment']:
515
- row['vader_score'] = article['sentiment']['vader']
516
- if 'loughran_mcdonald' in article['sentiment']:
517
- row['loughran_mcdonald_score'] = article['sentiment']['loughran_mcdonald']
518
- if 'finbert' in article['sentiment']:
519
- row['finbert_score'] = article['sentiment']['finbert']
520
-
521
- csv_data.append(row)
522
-
523
- df = pd.DataFrame(csv_data)
524
- return df.to_csv(index=False)
525
-
526
- def show_demo_dashboard():
527
- """Show demo dashboard with sample data"""
528
- st.header("🚀 Welcome to Global Business News Intelligence")
529
-
530
- st.markdown("""
531
- ### Key Features:
532
- - **🔍 Multi-Source News Scraping:** Aggregates news from reliable sources
533
- - **🎯 Advanced Sentiment Analysis:** Uses VADER, Loughran-McDonald, and FinBERT models
534
- - **🌐 Multilingual Support:** Summaries in English, Hindi, and Tamil
535
- - **🎵 Audio Generation:** Text-to-speech for all language summaries
536
- - **📊 Interactive Dashboard:** Real-time charts and visualizations
537
- - **📤 Multiple Export Formats:** CSV, JSON, and PDF reports
538
- - **🔌 API Access:** Programmatic access to all features
539
-
540
- ### Use Cases:
541
- - **📈 Investment Research:** Track sentiment around stocks and companies
542
- - **🏢 Brand Monitoring:** Monitor public perception of your brand
543
- - **🔍 Market Intelligence:** Stay informed about industry trends
544
- - **📰 Media Analysis:** Analyze coverage patterns across sources
545
- - **🌍 Global Insights:** Access news in multiple languages
546
-
547
- ### Get Started:
548
- 1. Enter a company name, stock ticker, or keyword in the sidebar
549
- 2. Configure your analysis settings
550
- 3. Click "Analyze News" to start
551
- 4. Explore results in the interactive dashboard
552
- 5. Export your findings in multiple formats
553
- """)
554
-
555
- # Sample visualization
556
- st.subheader("📊 Sample Analysis Dashboard")
557
-
558
- # Create sample data
559
- sample_data = {
560
- 'Sentiment': ['Positive', 'Negative', 'Neutral'],
561
- 'Count': [45, 15, 40]
562
  }
563
-
564
- fig = px.pie(
565
- values=sample_data['Count'],
566
- names=sample_data['Sentiment'],
567
- color_discrete_map={'Positive': '#28a745', 'Negative': '#dc3545', 'Neutral': '#6c757d'},
568
- title="Sample Sentiment Distribution"
569
- )
570
-
571
- col1, col2 = st.columns([1, 1])
572
- with col1:
573
- st.plotly_chart(fig, use_container_width=True)
574
-
575
- with col2:
576
- st.write("**Sample Metrics:**")
577
- st.metric("Articles Analyzed", "100")
578
- st.metric("Average Sentiment", "0.234")
579
- st.metric("Sources Covered", "15")
580
- st.metric("Languages", "3")
581
-
582
- if __name__ == "__main__":
583
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ """
3
+ Streamlit UI for the News Sentiment Analyzer.
4
+ - Calls the in-process FastAPI orchestrator (NewsAnalyzer) directly for zero-latency on Spaces.
5
+ - Lightweight, CPU-safe widgets with progress, charts, tables, and exports (CSV/JSON/PDF + Audio).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import io
11
+ import json
12
+ import logging
13
+ from datetime import datetime
14
+ from typing import Any, Dict, List
15
+
16
  import streamlit as st
17
  import pandas as pd
18
  import plotly.express as px
 
 
 
 
 
 
 
 
 
19
 
20
+ # Local modules
21
+ from api import analyzer # global NewsAnalyzer instance
22
+ from utils import (
23
+ setup_logging,
24
+ load_config,
25
+ calculate_sentiment_distribution,
26
+ format_number,
27
+ )
28
+ from report import generate_pdf_report # your existing PDF generator
29
+
30
+ # ------------------------------------------------------------------------------
31
+ # App setup
32
+ # ------------------------------------------------------------------------------
33
+
34
+ setup_logging()
35
+ logger = logging.getLogger("app")
36
 
 
37
  st.set_page_config(
38
+ page_title="News Sentiment Analyzer",
39
+ page_icon="📰",
40
  layout="wide",
 
41
  )
42
 
43
+ # Minimal CSS polish
44
+ st.markdown(
45
+ """
46
+ <style>
47
+ .small { font-size: 0.85rem; color: #666; }
48
+ .ok { color: #1b8a5a; }
49
+ .bad { color: #b00020; }
50
+ .neutral { color: #666; }
51
+ .stProgress > div > div > div { background-color: #4b8bf4; }
52
+ .block-container { padding-top: 2rem; }
53
+ </style>
54
+ """,
55
+ unsafe_allow_html=True,
56
+ )
57
+
58
+ # ------------------------------------------------------------------------------
59
+ # Sidebar controls
60
+ # ------------------------------------------------------------------------------
61
+
62
+ cfg = load_config()
63
+
64
+ st.sidebar.header("Settings")
65
+ default_query = st.sidebar.text_input("Company / Keyword", value="Tesla")
66
+ num_articles = st.sidebar.slider("Number of articles", 5, 50, 20, step=1)
67
+ languages = st.sidebar.multiselect(
68
+ "Summaries in languages",
69
+ options=["English", "Hindi", "Tamil"],
70
+ default=["English"],
71
+ )
72
+ include_audio = st.sidebar.checkbox("Generate audio summary", value=True)
73
+ sentiment_models = st.sidebar.multiselect(
74
+ "Sentiment models",
75
+ options=["VADER", "Loughran-McDonald", "FinBERT"],
76
+ default=["VADER", "Loughran-McDonald", "FinBERT"],
77
+ )
78
+ st.sidebar.caption("Tip: disable FinBERT if your Space has < 2GB RAM.")
79
+
80
+ run_btn = st.sidebar.button("Analyze", use_container_width=True, type="primary")
81
+
82
+ # ------------------------------------------------------------------------------
83
+ # Header
84
+ # ------------------------------------------------------------------------------
85
+
86
+ st.title("📰 News Sentiment Analyzer")
87
+ st.caption("Scrape Summarize → Sentiment → Keywords → Multilingual → Audio — deployed on Hugging Face Spaces")
88
+
89
+ # ------------------------------------------------------------------------------
90
+ # Helper functions
91
+ # ------------------------------------------------------------------------------
92
+
93
+ def _articles_to_df(articles: List[Dict[str, Any]]) -> pd.DataFrame:
94
+ rows = []
95
+ for a in articles:
96
+ rows.append(
97
+ {
98
+ "title": a.get("title", ""),
99
+ "source": a.get("source", ""),
100
+ "date": a.get("date"),
101
+ "url": a.get("url", ""),
102
+ "summary": a.get("summary", ""),
103
+ "sentiment_compound": a.get("sentiment", {}).get("compound", 0.0),
104
+ }
 
 
 
 
 
 
 
 
 
 
 
 
105
  )
106
+ df = pd.DataFrame(rows)
107
+ if "date" in df.columns:
108
+ try:
109
+ df["date"] = pd.to_datetime(df["date"])
110
+ except Exception:
111
+ pass
112
+ return df
113
+
114
+
115
+ def _render_distribution(dist: Dict[str, Any]):
116
+ cols = st.columns(4)
117
+ cols[0].metric("Total", dist.get("total", 0))
118
+ cols[1].metric("Positive", dist.get("positive", 0))
119
+ cols[2].metric("Negative", dist.get("negative", 0))
120
+ cols[3].metric("Neutral", dist.get("neutral", 0))
121
+
122
+ chart_df = pd.DataFrame(
123
+ {
124
+ "Sentiment": ["Positive", "Negative", "Neutral"],
125
+ "Count": [
126
+ dist.get("positive", 0),
127
+ dist.get("negative", 0),
128
+ dist.get("neutral", 0),
129
+ ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  }
131
+ )
132
+ fig = px.bar(chart_df, x="Sentiment", y="Count", title="Sentiment distribution")
133
+ st.plotly_chart(fig, use_container_width=True)
134
+
135
+
136
+ def _download_buttons(results: Dict[str, Any], df: pd.DataFrame):
137
+ c1, c2, c3 = st.columns(3)
138
+
139
+ # JSON
140
+ with c1:
141
+ json_bytes = json.dumps(results, default=str, indent=2).encode("utf-8")
142
+ st.download_button(
143
+ "Download JSON",
144
+ data=json_bytes,
145
+ file_name=f"news_analysis_{results['query']}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
146
+ mime="application/json",
147
+ use_container_width=True,
148
  )
149
+
150
+ # CSV
151
+ with c2:
152
+ csv_bytes = df.to_csv(index=False).encode("utf-8")
153
+ st.download_button(
154
+ "Download CSV",
155
+ data=csv_bytes,
156
+ file_name=f"news_analysis_{results['query']}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
157
+ mime="text/csv",
158
+ use_container_width=True,
 
 
 
 
 
159
  )
160
+
161
+ # PDF
162
+ with c3:
163
+ try:
164
+ pdf_obj = generate_pdf_report(results) # may return bytes or a file path
165
+ if isinstance(pdf_obj, (bytes, bytearray)):
166
+ pdf_bytes = pdf_obj
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  else:
168
+ # Assume it's a file path
169
+ with open(pdf_obj, "rb") as f:
170
+ pdf_bytes = f.read()
171
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  st.download_button(
173
+ "Download PDF",
174
+ data=pdf_bytes,
175
+ file_name=f"news_analysis_{results['query']}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf",
176
+ mime="application/pdf",
177
+ use_container_width=True,
178
  )
179
+ except Exception as e:
180
+ st.info("PDF generator not available or failed. You can still export JSON/CSV.")
181
+ logger.exception(f"PDF generation failed: {e}")
182
+
183
+
184
+ def _render_audio(audio_files: Dict[str, Any]):
185
+ if not audio_files:
186
+ return
187
+ st.subheader("Audio summaries")
188
+ for lang, path in audio_files.items():
189
+ if path:
190
+ st.markdown(f"**{lang}**")
191
  try:
192
+ with open(path, "rb") as f:
193
+ st.audio(f.read(), format="audio/mp3")
194
+ except Exception:
195
+ # Some Spaces require passing the path directly
196
+ st.audio(path)
197
+
198
+
199
+ # ------------------------------------------------------------------------------
200
+ # Main flow
201
+ # ------------------------------------------------------------------------------
202
+
203
+ if run_btn:
204
+ st.info("Starting analysis… this may take ~30–60 seconds on a CPU Space (FinBERT/summarizer/translation are heavy).")
205
+
206
+ progress = st.progress(0, text="Initializing…")
207
+
208
+ def _cb(p: int, status: str):
209
+ try:
210
+ progress.progress(p, text=status)
211
+ except Exception:
212
+ pass
213
+
214
+ config = {
215
+ "query": default_query,
216
+ "num_articles": num_articles,
217
+ "languages": languages or ["English"],
218
+ "include_audio": include_audio,
219
+ "sentiment_models": sentiment_models or ["VADER", "Loughran-McDonald", "FinBERT"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  }
221
+
222
+ try:
223
+ results: Dict[str, Any] = analyzer.analyze_news(config, progress_callback=_cb)
224
+ except Exception as e:
225
+ progress.empty()
226
+ st.error(f"Analysis failed: {e}")
227
+ st.stop()
228
+
229
+ progress.empty()
230
+
231
+ # Handle empty gracefully
232
+ if not results.get("articles"):
233
+ st.warning("No articles found or scraping failed. Try a different query or reduce filters.")
234
+ st.stop()
235
+
236
+ # Header summary
237
+ st.subheader(f"Results {results['query']}")
238
+ dist = results["summary"]["distribution"]
239
+ _render_distribution(dist)
240
+
241
+ # Keywords
242
+ if results.get("keywords"):
243
+ top_kw = ", ".join(kw["keyword"] for kw in results["keywords"][:12])
244
+ st.markdown(f"**Top keywords:** {top_kw}")
245
+
246
+ # Articles table
247
+ df = _articles_to_df(results["articles"])
248
+ st.dataframe(df, use_container_width=True, hide_index=True)
249
+
250
+ # Audio (optional)
251
+ if results.get("audio_files"):
252
+ _render_audio(results["audio_files"])
253
+
254
+ # Exports
255
+ st.divider()
256
+ _download_buttons(results, df)
257
+
258
+ else:
259
+ st.info("Enter a company/keyword on the left and click Analyze. Example: Tesla, Nvidia, Reliance, HDFC, Adani, BYD.")
260
+
261
+ # Footer
262
+ st.markdown(
263
+ "<p class='small'>Built with Streamlit + FastAPI · CPU-only · "
264
+ "FinBERT/VADER/LM sentiment · BART/T5 summarization · YAKE keywords · gTTS audio.</p>",
265
+ unsafe_allow_html=True,
266
+ )