|
|
|
""" |
|
Streamlit UI for the News Sentiment Analyzer. |
|
- Calls the in-process FastAPI orchestrator (NewsAnalyzer) directly for zero-latency on Spaces. |
|
- Lightweight, CPU-safe widgets with progress, charts, tables, and exports (CSV/JSON/PDF + Audio). |
|
""" |
|
|
|
from __future__ import annotations |
|
|
|
import io |
|
import json |
|
import logging |
|
from datetime import datetime |
|
from typing import Any, Dict, List |
|
|
|
import streamlit as st |
|
import pandas as pd |
|
import plotly.express as px |
|
|
|
|
|
from api import analyzer |
|
from utils import ( |
|
setup_logging, |
|
load_config, |
|
calculate_sentiment_distribution, |
|
format_number, |
|
) |
|
from report import generate_pdf_report |
|
|
|
|
|
|
|
|
|
|
|
setup_logging() |
|
logger = logging.getLogger("app") |
|
|
|
st.set_page_config( |
|
page_title="News Sentiment Analyzer", |
|
page_icon="📰", |
|
layout="wide", |
|
) |
|
|
|
|
|
st.markdown( |
|
""" |
|
<style> |
|
.small { font-size: 0.85rem; color: #666; } |
|
.ok { color: #1b8a5a; } |
|
.bad { color: #b00020; } |
|
.neutral { color: #666; } |
|
.stProgress > div > div > div { background-color: #4b8bf4; } |
|
.block-container { padding-top: 2rem; } |
|
</style> |
|
""", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
|
|
|
|
|
|
|
|
cfg = load_config() |
|
|
|
st.sidebar.header("Settings") |
|
default_query = st.sidebar.text_input("Company / Keyword", value="Tesla") |
|
num_articles = st.sidebar.slider("Number of articles", 5, 50, 20, step=1) |
|
languages = st.sidebar.multiselect( |
|
"Summaries in languages", |
|
options=["English", "Hindi", "Tamil"], |
|
default=["English"], |
|
) |
|
include_audio = st.sidebar.checkbox("Generate audio summary", value=True) |
|
sentiment_models = st.sidebar.multiselect( |
|
"Sentiment models", |
|
options=["VADER", "Loughran-McDonald", "FinBERT"], |
|
default=["VADER", "Loughran-McDonald", "FinBERT"], |
|
) |
|
st.sidebar.caption("Tip: disable FinBERT if your Space has < 2GB RAM.") |
|
|
|
run_btn = st.sidebar.button("Analyze", use_container_width=True, type="primary") |
|
|
|
|
|
|
|
|
|
|
|
st.title("📰 News Sentiment Analyzer") |
|
st.caption("Scrape → Summarize → Sentiment → Keywords → Multilingual → Audio — deployed on Hugging Face Spaces") |
|
|
|
|
|
|
|
|
|
|
|
def _articles_to_df(articles: List[Dict[str, Any]]) -> pd.DataFrame: |
|
rows = [] |
|
for a in articles: |
|
rows.append( |
|
{ |
|
"title": a.get("title", ""), |
|
"source": a.get("source", ""), |
|
"date": a.get("date"), |
|
"url": a.get("url", ""), |
|
"summary": a.get("summary", ""), |
|
"sentiment_compound": a.get("sentiment", {}).get("compound", 0.0), |
|
} |
|
) |
|
df = pd.DataFrame(rows) |
|
if "date" in df.columns: |
|
try: |
|
df["date"] = pd.to_datetime(df["date"]) |
|
except Exception: |
|
pass |
|
return df |
|
|
|
|
|
def _render_distribution(dist: Dict[str, Any]): |
|
cols = st.columns(4) |
|
cols[0].metric("Total", dist.get("total", 0)) |
|
cols[1].metric("Positive", dist.get("positive", 0)) |
|
cols[2].metric("Negative", dist.get("negative", 0)) |
|
cols[3].metric("Neutral", dist.get("neutral", 0)) |
|
|
|
chart_df = pd.DataFrame( |
|
{ |
|
"Sentiment": ["Positive", "Negative", "Neutral"], |
|
"Count": [ |
|
dist.get("positive", 0), |
|
dist.get("negative", 0), |
|
dist.get("neutral", 0), |
|
], |
|
} |
|
) |
|
fig = px.bar(chart_df, x="Sentiment", y="Count", title="Sentiment distribution") |
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
|
|
def _download_buttons(results: Dict[str, Any], df: pd.DataFrame): |
|
c1, c2, c3 = st.columns(3) |
|
|
|
|
|
with c1: |
|
json_bytes = json.dumps(results, default=str, indent=2).encode("utf-8") |
|
st.download_button( |
|
"Download JSON", |
|
data=json_bytes, |
|
file_name=f"news_analysis_{results['query']}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json", |
|
mime="application/json", |
|
use_container_width=True, |
|
) |
|
|
|
|
|
with c2: |
|
csv_bytes = df.to_csv(index=False).encode("utf-8") |
|
st.download_button( |
|
"Download CSV", |
|
data=csv_bytes, |
|
file_name=f"news_analysis_{results['query']}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv", |
|
mime="text/csv", |
|
use_container_width=True, |
|
) |
|
|
|
|
|
with c3: |
|
try: |
|
pdf_obj = generate_pdf_report(results) |
|
|
|
|
|
pdf_bytes = None |
|
if isinstance(pdf_obj, (bytes, bytearray)): |
|
pdf_bytes = pdf_obj |
|
elif hasattr(pdf_obj, "read"): |
|
try: |
|
pdf_bytes = pdf_obj.read() |
|
if not pdf_bytes and hasattr(pdf_obj, "getvalue"): |
|
pdf_bytes = pdf_obj.getvalue() |
|
finally: |
|
try: |
|
pdf_obj.close() |
|
except Exception: |
|
pass |
|
else: |
|
|
|
with open(pdf_obj, "rb") as f: |
|
pdf_bytes = f.read() |
|
|
|
if not pdf_bytes: |
|
raise ValueError("Empty PDF bytes") |
|
|
|
st.download_button( |
|
"Download PDF", |
|
data=pdf_bytes, |
|
file_name=f"news_analysis_{results['query']}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf", |
|
mime="application/pdf", |
|
use_container_width=True, |
|
) |
|
except Exception as e: |
|
st.info("PDF generator not available or failed. You can still export JSON/CSV.") |
|
logger.exception(f"PDF generation failed: {e}") |
|
|
|
|
|
|
|
def _render_audio(audio_files: Dict[str, Any]): |
|
if not audio_files: |
|
return |
|
st.subheader("Audio summaries") |
|
for lang, path in audio_files.items(): |
|
if path: |
|
st.markdown(f"**{lang}**") |
|
try: |
|
with open(path, "rb") as f: |
|
st.audio(f.read(), format="audio/mp3") |
|
except Exception: |
|
|
|
st.audio(path) |
|
|
|
|
|
|
|
|
|
|
|
|
|
if run_btn: |
|
st.info("Starting analysis… this may take ~30–60 seconds on a CPU Space (FinBERT/summarizer/translation are heavy).") |
|
|
|
progress = st.progress(0, text="Initializing…") |
|
|
|
def _cb(p: int, status: str): |
|
try: |
|
progress.progress(p, text=status) |
|
except Exception: |
|
pass |
|
|
|
config = { |
|
"query": default_query, |
|
"num_articles": num_articles, |
|
"languages": languages or ["English"], |
|
"include_audio": include_audio, |
|
"sentiment_models": sentiment_models or ["VADER", "Loughran-McDonald", "FinBERT"], |
|
} |
|
|
|
try: |
|
results: Dict[str, Any] = analyzer.analyze_news(config, progress_callback=_cb) |
|
except Exception as e: |
|
progress.empty() |
|
st.error(f"Analysis failed: {e}") |
|
st.stop() |
|
|
|
progress.empty() |
|
|
|
|
|
if not results.get("articles"): |
|
st.warning("No articles found or scraping failed. Try a different query or reduce filters.") |
|
st.stop() |
|
|
|
|
|
st.subheader(f"Results — {results['query']}") |
|
dist = results["summary"]["distribution"] |
|
_render_distribution(dist) |
|
|
|
|
|
if results.get("keywords"): |
|
top_kw = ", ".join(kw["keyword"] for kw in results["keywords"][:12]) |
|
st.markdown(f"**Top keywords:** {top_kw}") |
|
|
|
|
|
df = _articles_to_df(results["articles"]) |
|
st.dataframe(df, use_container_width=True, hide_index=True) |
|
|
|
|
|
if results.get("audio_files"): |
|
_render_audio(results["audio_files"]) |
|
|
|
|
|
st.divider() |
|
_download_buttons(results, df) |
|
|
|
else: |
|
st.info("Enter a company/keyword on the left and click Analyze. Example: Tesla, Nvidia, Reliance, HDFC, Adani, BYD.") |
|
|
|
|
|
st.markdown( |
|
"<p class='small'>Built with Streamlit + FastAPI · CPU-only · " |
|
"FinBERT/VADER/LM sentiment · BART/T5 summarization · YAKE keywords · gTTS audio.</p>", |
|
unsafe_allow_html=True, |
|
) |
|
|