Spaces:
Sleeping
Sleeping
# app.py | |
""" | |
Streamlit UI for the News Sentiment Analyzer. | |
- Calls the in-process FastAPI orchestrator (NewsAnalyzer) directly for zero-latency on Spaces. | |
- Lightweight, CPU-safe widgets with progress, charts, tables, and exports (CSV/JSON/PDF + Audio). | |
""" | |
from __future__ import annotations | |
import io | |
import json | |
import logging | |
from datetime import datetime | |
from typing import Any, Dict, List | |
import streamlit as st | |
import pandas as pd | |
import plotly.express as px | |
# Local modules | |
from api import analyzer # global NewsAnalyzer instance | |
from utils import ( | |
setup_logging, | |
load_config, | |
calculate_sentiment_distribution, | |
format_number, | |
) | |
from report import generate_pdf_report # your existing PDF generator | |
# ------------------------------------------------------------------------------ | |
# App setup | |
# ------------------------------------------------------------------------------ | |
setup_logging() | |
logger = logging.getLogger("app") | |
st.set_page_config( | |
page_title="News Sentiment Analyzer", | |
page_icon="📰", | |
layout="wide", | |
) | |
# Minimal CSS polish | |
st.markdown( | |
""" | |
<style> | |
.small { font-size: 0.85rem; color: #666; } | |
.ok { color: #1b8a5a; } | |
.bad { color: #b00020; } | |
.neutral { color: #666; } | |
.stProgress > div > div > div { background-color: #4b8bf4; } | |
.block-container { padding-top: 2rem; } | |
</style> | |
""", | |
unsafe_allow_html=True, | |
) | |
# ------------------------------------------------------------------------------ | |
# Sidebar controls | |
# ------------------------------------------------------------------------------ | |
cfg = load_config() | |
st.sidebar.header("Settings") | |
default_query = st.sidebar.text_input("Company / Keyword", value="Tesla") | |
num_articles = st.sidebar.slider("Number of articles", 5, 50, 20, step=1) | |
languages = st.sidebar.multiselect( | |
"Summaries in languages", | |
options=["English", "Hindi", "Tamil"], | |
default=["English"], | |
) | |
include_audio = st.sidebar.checkbox("Generate audio summary", value=True) | |
sentiment_models = st.sidebar.multiselect( | |
"Sentiment models", | |
options=["VADER", "Loughran-McDonald", "FinBERT"], | |
default=["VADER", "Loughran-McDonald", "FinBERT"], | |
) | |
st.sidebar.caption("Tip: disable FinBERT if your Space has < 2GB RAM.") | |
run_btn = st.sidebar.button("Analyze", use_container_width=True, type="primary") | |
# ------------------------------------------------------------------------------ | |
# Header | |
# ------------------------------------------------------------------------------ | |
st.title("📰 News Sentiment Analyzer") | |
st.caption("Scrape → Summarize → Sentiment → Keywords → Multilingual → Audio — deployed on Hugging Face Spaces") | |
# ------------------------------------------------------------------------------ | |
# Helper functions | |
# ------------------------------------------------------------------------------ | |
def _articles_to_df(articles: List[Dict[str, Any]]) -> pd.DataFrame: | |
rows = [] | |
for a in articles: | |
rows.append( | |
{ | |
"title": a.get("title", ""), | |
"source": a.get("source", ""), | |
"date": a.get("date"), | |
"url": a.get("url", ""), | |
"summary": a.get("summary", ""), | |
"sentiment_compound": a.get("sentiment", {}).get("compound", 0.0), | |
} | |
) | |
df = pd.DataFrame(rows) | |
if "date" in df.columns: | |
try: | |
df["date"] = pd.to_datetime(df["date"]) | |
except Exception: | |
pass | |
return df | |
def _render_distribution(dist: Dict[str, Any]): | |
cols = st.columns(4) | |
cols[0].metric("Total", dist.get("total", 0)) | |
cols[1].metric("Positive", dist.get("positive", 0)) | |
cols[2].metric("Negative", dist.get("negative", 0)) | |
cols[3].metric("Neutral", dist.get("neutral", 0)) | |
chart_df = pd.DataFrame( | |
{ | |
"Sentiment": ["Positive", "Negative", "Neutral"], | |
"Count": [ | |
dist.get("positive", 0), | |
dist.get("negative", 0), | |
dist.get("neutral", 0), | |
], | |
} | |
) | |
fig = px.bar(chart_df, x="Sentiment", y="Count", title="Sentiment distribution") | |
st.plotly_chart(fig, use_container_width=True) | |
def _download_buttons(results: Dict[str, Any], df: pd.DataFrame): | |
c1, c2, c3 = st.columns(3) | |
# JSON | |
with c1: | |
json_bytes = json.dumps(results, default=str, indent=2).encode("utf-8") | |
st.download_button( | |
"Download JSON", | |
data=json_bytes, | |
file_name=f"news_analysis_{results['query']}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json", | |
mime="application/json", | |
use_container_width=True, | |
) | |
# CSV | |
with c2: | |
csv_bytes = df.to_csv(index=False).encode("utf-8") | |
st.download_button( | |
"Download CSV", | |
data=csv_bytes, | |
file_name=f"news_analysis_{results['query']}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv", | |
mime="text/csv", | |
use_container_width=True, | |
) | |
with c3: | |
try: | |
pdf_obj = generate_pdf_report(results) | |
# Accept bytes, BytesIO/file-like, or path | |
pdf_bytes = None | |
if isinstance(pdf_obj, (bytes, bytearray)): | |
pdf_bytes = pdf_obj | |
elif hasattr(pdf_obj, "read"): # file-like object | |
try: | |
pdf_bytes = pdf_obj.read() | |
if not pdf_bytes and hasattr(pdf_obj, "getvalue"): | |
pdf_bytes = pdf_obj.getvalue() | |
finally: | |
try: | |
pdf_obj.close() | |
except Exception: | |
pass | |
else: | |
# Assume file path | |
with open(pdf_obj, "rb") as f: | |
pdf_bytes = f.read() | |
if not pdf_bytes: | |
raise ValueError("Empty PDF bytes") | |
st.download_button( | |
"Download PDF", | |
data=pdf_bytes, | |
file_name=f"news_analysis_{results['query']}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf", | |
mime="application/pdf", | |
use_container_width=True, | |
) | |
except Exception as e: | |
st.info("PDF generator not available or failed. You can still export JSON/CSV.") | |
logger.exception(f"PDF generation failed: {e}") | |
def _render_audio(audio_files: Dict[str, Any]): | |
if not audio_files: | |
return | |
st.subheader("Audio summaries") | |
for lang, path in audio_files.items(): | |
if path: | |
st.markdown(f"**{lang}**") | |
try: | |
with open(path, "rb") as f: | |
st.audio(f.read(), format="audio/mp3") | |
except Exception: | |
# Some Spaces require passing the path directly | |
st.audio(path) | |
# ------------------------------------------------------------------------------ | |
# Main flow | |
# ------------------------------------------------------------------------------ | |
if run_btn: | |
st.info("Starting analysis… this may take ~30–60 seconds on a CPU Space (FinBERT/summarizer/translation are heavy).") | |
progress = st.progress(0, text="Initializing…") | |
def _cb(p: int, status: str): | |
try: | |
progress.progress(p, text=status) | |
except Exception: | |
pass | |
config = { | |
"query": default_query, | |
"num_articles": num_articles, | |
"languages": languages or ["English"], | |
"include_audio": include_audio, | |
"sentiment_models": sentiment_models or ["VADER", "Loughran-McDonald", "FinBERT"], | |
} | |
try: | |
results: Dict[str, Any] = analyzer.analyze_news(config, progress_callback=_cb) | |
except Exception as e: | |
progress.empty() | |
st.error(f"Analysis failed: {e}") | |
st.stop() | |
progress.empty() | |
# Handle empty gracefully | |
if not results.get("articles"): | |
st.warning("No articles found or scraping failed. Try a different query or reduce filters.") | |
st.stop() | |
# Header summary | |
st.subheader(f"Results — {results['query']}") | |
dist = results["summary"]["distribution"] | |
_render_distribution(dist) | |
# Keywords | |
if results.get("keywords"): | |
top_kw = ", ".join(kw["keyword"] for kw in results["keywords"][:12]) | |
st.markdown(f"**Top keywords:** {top_kw}") | |
# Articles table | |
df = _articles_to_df(results["articles"]) | |
st.dataframe(df, use_container_width=True, hide_index=True) | |
# Audio (optional) | |
if results.get("audio_files"): | |
_render_audio(results["audio_files"]) | |
# Exports | |
st.divider() | |
_download_buttons(results, df) | |
else: | |
st.info("Enter a company/keyword on the left and click Analyze. Example: Tesla, Nvidia, Reliance, HDFC, Adani, BYD.") | |
# Footer | |
st.markdown( | |
"<p class='small'>Built with Streamlit + FastAPI · CPU-only · " | |
"FinBERT/VADER/LM sentiment · BART/T5 summarization · YAKE keywords · gTTS audio.</p>", | |
unsafe_allow_html=True, | |
) | |