Update api.py
Browse files
api.py
CHANGED
@@ -1,355 +1,371 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
import logging
|
7 |
from datetime import datetime
|
8 |
-
import
|
9 |
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
from scraper import NewsletterScraper
|
12 |
-
from
|
13 |
-
from summarizer import TextSummarizer
|
14 |
from translator import MultilingualTranslator
|
15 |
from tts import AudioGenerator
|
16 |
-
from
|
|
|
|
|
|
|
|
|
17 |
|
18 |
-
# Setup logging
|
19 |
setup_logging()
|
20 |
-
logger = logging.getLogger(
|
21 |
|
22 |
-
# FastAPI app
|
23 |
app = FastAPI(
|
24 |
-
title="
|
25 |
-
|
26 |
-
|
27 |
)
|
28 |
|
29 |
-
#
|
|
|
30 |
app.add_middleware(
|
31 |
CORSMiddleware,
|
32 |
allow_origins=["*"],
|
33 |
-
allow_credentials=True,
|
34 |
allow_methods=["*"],
|
35 |
allow_headers=["*"],
|
36 |
)
|
37 |
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
42 |
include_audio: bool = True
|
43 |
-
sentiment_models: List[str] =
|
|
|
|
|
|
|
44 |
|
45 |
-
class
|
46 |
query: str
|
47 |
-
|
48 |
-
processing_time: float
|
49 |
-
average_sentiment: float
|
50 |
-
sentiment_distribution: Dict[str, int]
|
51 |
articles: List[Dict[str, Any]]
|
52 |
keywords: List[Dict[str, Any]]
|
53 |
-
|
54 |
languages: List[str]
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
class NewsAnalyzer:
|
58 |
-
"""
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
self.
|
65 |
-
|
66 |
-
self.
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
start_time = datetime.now()
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
)
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
if len(languages) > 1:
|
123 |
-
article['summaries'] = {}
|
124 |
-
for lang in languages:
|
125 |
-
if lang != 'English':
|
126 |
-
article['summaries'][lang] = self.translator.translate(
|
127 |
-
article['summary'],
|
128 |
-
target_lang=lang
|
129 |
-
)
|
130 |
-
else:
|
131 |
-
article['summaries'][lang] = article['summary']
|
132 |
-
|
133 |
if progress_callback:
|
134 |
-
progress_callback(
|
135 |
-
|
136 |
-
|
137 |
-
audio_files = {}
|
138 |
-
if include_audio and languages:
|
139 |
-
# Create overall summary for audio
|
140 |
-
overall_summary = self.create_overall_summary(articles, keywords)
|
141 |
-
|
142 |
for lang in languages:
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
audio_file = self.audio_generator.generate_audio(
|
151 |
-
summary_text,
|
152 |
-
language=lang,
|
153 |
-
output_file=f"summary_{lang.lower()}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3"
|
154 |
)
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
'Negative': sum(1 for s in sentiments if s < -0.1),
|
169 |
-
'Neutral': sum(1 for s in sentiments if -0.1 <= s <= 0.1)
|
170 |
-
}
|
171 |
-
|
172 |
-
# Step 7: Prepare results
|
173 |
-
processing_time = (datetime.now() - start_time).total_seconds()
|
174 |
-
|
175 |
-
results = {
|
176 |
-
'query': query,
|
177 |
-
'total_articles': len(articles),
|
178 |
-
'processing_time': processing_time,
|
179 |
-
'average_sentiment': average_sentiment,
|
180 |
-
'sentiment_distribution': sentiment_distribution,
|
181 |
-
'articles': articles,
|
182 |
-
'keywords': keywords,
|
183 |
-
'languages': languages,
|
184 |
-
'audio_files': audio_files,
|
185 |
-
'summary': {
|
186 |
-
'average_sentiment': average_sentiment,
|
187 |
-
'total_articles': len(articles),
|
188 |
-
'sources': len(set([article['source'] for article in articles])),
|
189 |
-
'date_range': self.get_date_range(articles)
|
190 |
-
}
|
191 |
-
}
|
192 |
-
|
193 |
if progress_callback:
|
194 |
-
progress_callback(
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
|
|
|
|
|
|
245 |
try:
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
262 |
analyzer = NewsAnalyzer()
|
263 |
|
264 |
-
# FastAPI endpoints
|
265 |
-
@app.get("/", response_model=Dict[str, str])
|
266 |
-
async def root():
|
267 |
-
"""API root endpoint"""
|
268 |
-
return {
|
269 |
-
"message": "Global Business News Intelligence API",
|
270 |
-
"version": "1.0.0",
|
271 |
-
"docs": "/docs"
|
272 |
-
}
|
273 |
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
async def analyze_news_endpoint(
|
281 |
-
query: str = Query(..., description="Company name, ticker, or keyword to analyze"),
|
282 |
-
num_articles: int = Query(20, description="Number of articles to analyze (5-50)", ge=5, le=50),
|
283 |
-
languages: List[str] = Query(["English"], description="Languages for summaries"),
|
284 |
-
include_audio: bool = Query(True, description="Generate audio summaries"),
|
285 |
-
sentiment_models: List[str] = Query(["VADER", "Loughran-McDonald", "FinBERT"], description="Sentiment models to use")
|
286 |
-
):
|
287 |
-
"""Main analysis endpoint"""
|
288 |
-
try:
|
289 |
-
config = {
|
290 |
-
'query': query,
|
291 |
-
'num_articles': num_articles,
|
292 |
-
'languages': languages,
|
293 |
-
'include_audio': include_audio,
|
294 |
-
'sentiment_models': sentiment_models
|
295 |
-
}
|
296 |
-
|
297 |
-
results = await analyzer.analyze_news_async(config)
|
298 |
-
|
299 |
-
return AnalysisResponse(**results)
|
300 |
-
|
301 |
-
except Exception as e:
|
302 |
-
logger.error(f"Error in analyze endpoint: {str(e)}")
|
303 |
-
raise HTTPException(status_code=500, detail=str(e))
|
304 |
-
|
305 |
-
@app.post("/api/analyze", response_model=AnalysisResponse)
|
306 |
-
async def analyze_news_post(request: AnalysisRequest):
|
307 |
-
"""POST version of analysis endpoint"""
|
308 |
-
try:
|
309 |
-
config = request.dict()
|
310 |
-
results = await analyzer.analyze_news_async(config)
|
311 |
-
return AnalysisResponse(**results)
|
312 |
-
|
313 |
-
except Exception as e:
|
314 |
-
logger.error(f"Error in analyze POST endpoint: {str(e)}")
|
315 |
-
raise HTTPException(status_code=500, detail=str(e))
|
316 |
-
|
317 |
-
@app.get("/api/sources", response_model=List[str])
|
318 |
-
async def get_available_sources():
|
319 |
-
"""Get list of available news sources"""
|
320 |
-
return analyzer.scraper.get_available_sources()
|
321 |
-
|
322 |
-
@app.get("/api/models", response_model=Dict[str, List[str]])
|
323 |
-
async def get_available_models():
|
324 |
-
"""Get list of available models"""
|
325 |
return {
|
326 |
-
"
|
327 |
-
"
|
328 |
-
"
|
329 |
-
"audio_languages": ["English", "Hindi", "Tamil"]
|
330 |
}
|
331 |
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
|
|
|
|
|
|
|
|
336 |
):
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
return keywords
|
348 |
-
|
349 |
-
except Exception as e:
|
350 |
-
logger.error(f"Error in keywords endpoint: {str(e)}")
|
351 |
-
raise HTTPException(status_code=500, detail=str(e))
|
352 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
353 |
if __name__ == "__main__":
|
354 |
import uvicorn
|
355 |
-
|
|
|
|
|
|
|
|
1 |
+
# api.py
|
2 |
+
"""
|
3 |
+
FastAPI backend for the News Sentiment Analyzer.
|
4 |
+
- Orchestrates scraping, NLP, summarization, translation, and TTS.
|
5 |
+
- Safe for Hugging Face Spaces (CPU-only, lazy model loading, CORS open).
|
6 |
+
"""
|
7 |
+
|
8 |
+
from __future__ import annotations
|
9 |
+
|
10 |
+
import os
|
11 |
+
import json
|
12 |
import logging
|
13 |
from datetime import datetime
|
14 |
+
from typing import Any, Dict, List, Optional
|
15 |
|
16 |
+
from fastapi import FastAPI, Query
|
17 |
+
from fastapi.middleware.cors import CORSMiddleware
|
18 |
+
from pydantic import BaseModel, Field
|
19 |
+
|
20 |
+
# Local modules
|
21 |
+
from utils import (
|
22 |
+
setup_logging,
|
23 |
+
load_config,
|
24 |
+
calculate_processing_stats,
|
25 |
+
calculate_sentiment_distribution,
|
26 |
+
)
|
27 |
from scraper import NewsletterScraper
|
28 |
+
from summarizer import TextSummarizer, extract_key_sentences
|
|
|
29 |
from translator import MultilingualTranslator
|
30 |
from tts import AudioGenerator
|
31 |
+
from nlp import SentimentAnalyzer, KeywordExtractor, TextProcessor # provided in your repo
|
32 |
+
|
33 |
+
# ------------------------------------------------------------------------------
|
34 |
+
# Init
|
35 |
+
# ------------------------------------------------------------------------------
|
36 |
|
|
|
37 |
setup_logging()
|
38 |
+
logger = logging.getLogger("api")
|
39 |
|
|
|
40 |
app = FastAPI(
|
41 |
+
title="News Intelligence API",
|
42 |
+
version="1.0.0",
|
43 |
+
description="Backend for News Sentiment Analyzer (Hugging Face deploy-ready)",
|
44 |
)
|
45 |
|
46 |
+
# Hugging Face Spaces often runs UI + API from same origin,
|
47 |
+
# but open CORS to keep it simple for local/dev and Space builds.
|
48 |
app.add_middleware(
|
49 |
CORSMiddleware,
|
50 |
allow_origins=["*"],
|
|
|
51 |
allow_methods=["*"],
|
52 |
allow_headers=["*"],
|
53 |
)
|
54 |
|
55 |
+
|
56 |
+
# ------------------------------------------------------------------------------
|
57 |
+
# Pydantic models
|
58 |
+
# ------------------------------------------------------------------------------
|
59 |
+
|
60 |
+
class AnalyzeRequest(BaseModel):
|
61 |
+
query: str = Field(..., description="Company / stock / keyword to analyze")
|
62 |
+
num_articles: int = Field(20, ge=5, le=50, description="Number of articles (5-50)")
|
63 |
+
languages: List[str] = Field(default_factory=lambda: ["English"])
|
64 |
include_audio: bool = True
|
65 |
+
sentiment_models: List[str] = Field(
|
66 |
+
default_factory=lambda: ["VADER", "Loughran-McDonald", "FinBERT"]
|
67 |
+
)
|
68 |
+
|
69 |
|
70 |
+
class AnalyzeResponse(BaseModel):
|
71 |
query: str
|
72 |
+
summary: Dict[str, Any]
|
|
|
|
|
|
|
73 |
articles: List[Dict[str, Any]]
|
74 |
keywords: List[Dict[str, Any]]
|
75 |
+
audio_files: Optional[Dict[str, Optional[str]]] = None
|
76 |
languages: List[str]
|
77 |
+
config: Dict[str, Any]
|
78 |
+
|
79 |
+
|
80 |
+
# ------------------------------------------------------------------------------
|
81 |
+
# Core Orchestrator
|
82 |
+
# ------------------------------------------------------------------------------
|
83 |
|
84 |
class NewsAnalyzer:
|
85 |
+
"""
|
86 |
+
All heavy components are created lazily to avoid high cold-start memory usage
|
87 |
+
and to play nice with Hugging Face CPU-only Spaces.
|
88 |
+
"""
|
89 |
+
|
90 |
+
def __init__(self) -> None:
|
91 |
+
self._cfg = load_config()
|
92 |
+
|
93 |
+
self._scraper: Optional[NewsletterScraper] = None
|
94 |
+
self._summarizer: Optional[TextSummarizer] = None
|
95 |
+
self._translator: Optional[MultilingualTranslator] = None
|
96 |
+
self._audio: Optional[AudioGenerator] = None
|
97 |
+
self._sentiment: Optional[SentimentAnalyzer] = None
|
98 |
+
self._keywords: Optional[KeywordExtractor] = None
|
99 |
+
self._textproc: Optional[TextProcessor] = None
|
100 |
+
|
101 |
+
logger.info("NewsAnalyzer initialized with lazy components.")
|
102 |
+
|
103 |
+
# --- Lazy props -----------------------------------------------------------
|
104 |
+
|
105 |
+
@property
|
106 |
+
def scraper(self) -> NewsletterScraper:
|
107 |
+
if self._scraper is None:
|
108 |
+
self._scraper = NewsletterScraper()
|
109 |
+
return self._scraper
|
110 |
+
|
111 |
+
@property
|
112 |
+
def summarizer(self) -> TextSummarizer:
|
113 |
+
if self._summarizer is None:
|
114 |
+
self._summarizer = TextSummarizer()
|
115 |
+
return self._summarizer
|
116 |
+
|
117 |
+
@property
|
118 |
+
def translator(self) -> MultilingualTranslator:
|
119 |
+
if self._translator is None:
|
120 |
+
self._translator = MultilingualTranslator()
|
121 |
+
return self._translator
|
122 |
+
|
123 |
+
@property
|
124 |
+
def audio(self) -> AudioGenerator:
|
125 |
+
if self._audio is None:
|
126 |
+
self._audio = AudioGenerator()
|
127 |
+
return self._audio
|
128 |
+
|
129 |
+
@property
|
130 |
+
def sentiment(self) -> SentimentAnalyzer:
|
131 |
+
if self._sentiment is None:
|
132 |
+
self._sentiment = SentimentAnalyzer()
|
133 |
+
return self._sentiment
|
134 |
+
|
135 |
+
@property
|
136 |
+
def keyword_extractor(self) -> KeywordExtractor:
|
137 |
+
if self._keywords is None:
|
138 |
+
self._keywords = KeywordExtractor()
|
139 |
+
return self._keywords
|
140 |
+
|
141 |
+
@property
|
142 |
+
def textproc(self) -> TextProcessor:
|
143 |
+
if self._textproc is None:
|
144 |
+
self._textproc = TextProcessor()
|
145 |
+
return self._textproc
|
146 |
+
|
147 |
+
# --- Pipeline -------------------------------------------------------------
|
148 |
+
|
149 |
+
def analyze_news(
|
150 |
+
self,
|
151 |
+
config: Dict[str, Any],
|
152 |
+
progress_callback=None,
|
153 |
+
) -> Dict[str, Any]:
|
154 |
+
"""
|
155 |
+
Synchronous pipeline used by Streamlit UI.
|
156 |
+
(FastAPI endpoint wraps it synchronously as well.)
|
157 |
+
"""
|
158 |
start_time = datetime.now()
|
159 |
+
query: str = config["query"].strip()
|
160 |
+
num_articles: int = int(config.get("num_articles", 20))
|
161 |
+
languages: List[str] = config.get("languages", ["English"]) or ["English"]
|
162 |
+
include_audio: bool = bool(config.get("include_audio", True))
|
163 |
+
sentiment_models: List[str] = config.get(
|
164 |
+
"sentiment_models", ["VADER", "Loughran-McDonald", "FinBERT"]
|
165 |
+
)
|
166 |
+
|
167 |
+
if progress_callback:
|
168 |
+
progress_callback(5, "Initializing pipeline...")
|
169 |
+
|
170 |
+
# --- Step 1: Scrape ---------------------------------------------------
|
171 |
+
if progress_callback:
|
172 |
+
progress_callback(10, "Scraping articles...")
|
173 |
+
articles = self.scraper.scrape_news(query, max_articles=num_articles)
|
174 |
+
|
175 |
+
if not articles:
|
176 |
+
# Return graceful empty response rather than raising
|
177 |
+
return {
|
178 |
+
"query": query,
|
179 |
+
"summary": {
|
180 |
+
"average_sentiment": 0.0,
|
181 |
+
"distribution": {"positive": 0, "negative": 0, "neutral": 0, "total": 0},
|
182 |
+
"processing": calculate_processing_stats(start_time, 0),
|
183 |
+
},
|
184 |
+
"articles": [],
|
185 |
+
"keywords": [],
|
186 |
+
"audio_files": {},
|
187 |
+
"languages": languages,
|
188 |
+
"config": config,
|
189 |
+
}
|
190 |
+
|
191 |
+
# Ensure 'content' is present
|
192 |
+
for a in articles:
|
193 |
+
if not a.get("content"):
|
194 |
+
a["content"] = a.get("summary") or a.get("title") or ""
|
195 |
+
|
196 |
+
# --- Step 2: Sentiment ------------------------------------------------
|
197 |
+
if progress_callback:
|
198 |
+
progress_callback(30, "Analyzing sentiment...")
|
199 |
+
for a in articles:
|
200 |
+
try:
|
201 |
+
a["sentiment"] = self.sentiment.analyze_sentiment(
|
202 |
+
a["content"], models=sentiment_models
|
203 |
)
|
204 |
+
except Exception as e:
|
205 |
+
logger.exception(f"Sentiment failed for '{a.get('title','')[:60]}': {e}")
|
206 |
+
a["sentiment"] = {"compound": 0.0}
|
207 |
+
|
208 |
+
# --- Step 3: Summaries ------------------------------------------------
|
209 |
+
if progress_callback:
|
210 |
+
progress_callback(50, "Generating summaries...")
|
211 |
+
for a in articles:
|
212 |
+
try:
|
213 |
+
a["summary"] = self.summarizer.summarize(a["content"])
|
214 |
+
except Exception as e:
|
215 |
+
logger.exception(f"Summarization failed: {e}")
|
216 |
+
a["summary"] = self.textproc.clean_text(a["content"])[:300] + "..."
|
217 |
+
|
218 |
+
# --- Step 4: Multilingual summaries ----------------------------------
|
219 |
+
if len(languages) > 1:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
if progress_callback:
|
221 |
+
progress_callback(60, "Translating summaries...")
|
222 |
+
for a in articles:
|
223 |
+
a["summaries"] = {}
|
|
|
|
|
|
|
|
|
|
|
224 |
for lang in languages:
|
225 |
+
try:
|
226 |
+
if lang == "English":
|
227 |
+
a["summaries"][lang] = a["summary"]
|
228 |
+
else:
|
229 |
+
a["summaries"][lang] = self.translator.translate(
|
230 |
+
a["summary"], target_lang=lang, source_lang="English"
|
|
|
|
|
|
|
|
|
|
|
231 |
)
|
232 |
+
except Exception as e:
|
233 |
+
logger.exception(f"Translation failed ({lang}): {e}")
|
234 |
+
a["summaries"][lang] = a["summary"]
|
235 |
+
|
236 |
+
# --- Step 5: Keywords (YAKE) -----------------------------------------
|
237 |
+
if progress_callback:
|
238 |
+
progress_callback(70, "Extracting keywords...")
|
239 |
+
joined = " ".join(a.get("content", "") for a in articles)
|
240 |
+
keywords = self.keyword_extractor.extract_keywords(joined) if joined else []
|
241 |
+
|
242 |
+
# --- Step 6: Optional Audio ------------------------------------------
|
243 |
+
audio_files: Dict[str, Optional[str]] = {}
|
244 |
+
if include_audio and languages:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
245 |
if progress_callback:
|
246 |
+
progress_callback(80, "Creating audio summaries...")
|
247 |
+
overall_summary = self._overall_summary_text(articles, keywords)
|
248 |
+
for lang in languages:
|
249 |
+
try:
|
250 |
+
summary_text = (
|
251 |
+
self.translator.translate(overall_summary, target_lang=lang)
|
252 |
+
if lang != "English"
|
253 |
+
else overall_summary
|
254 |
+
)
|
255 |
+
audio_files[lang] = self.audio.generate_audio(
|
256 |
+
summary_text,
|
257 |
+
language=lang,
|
258 |
+
output_file=f"summary_{lang.lower()}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3",
|
259 |
+
)
|
260 |
+
except Exception as e:
|
261 |
+
logger.exception(f"Audio failed ({lang}): {e}")
|
262 |
+
audio_files[lang] = None
|
263 |
+
|
264 |
+
# --- Summary stats ----------------------------------------------------
|
265 |
+
if progress_callback:
|
266 |
+
progress_callback(90, "Finalizing results...")
|
267 |
+
dist = calculate_sentiment_distribution(articles)
|
268 |
+
processing = calculate_processing_stats(start_time, len(articles))
|
269 |
+
|
270 |
+
results: Dict[str, Any] = {
|
271 |
+
"query": query,
|
272 |
+
"summary": {
|
273 |
+
"average_sentiment": dist.get("average_sentiment", 0.0),
|
274 |
+
"distribution": dist,
|
275 |
+
"processing": processing,
|
276 |
+
"top_sentences": extract_key_sentences(joined, num_sentences=3),
|
277 |
+
},
|
278 |
+
"articles": articles,
|
279 |
+
"keywords": keywords,
|
280 |
+
"audio_files": audio_files,
|
281 |
+
"languages": languages,
|
282 |
+
"config": config,
|
283 |
+
}
|
284 |
+
|
285 |
+
if progress_callback:
|
286 |
+
progress_callback(100, "Done.")
|
287 |
+
return results
|
288 |
+
|
289 |
+
# Helpers -----------------------------------------------------------------
|
290 |
+
|
291 |
+
def _overall_summary_text(self, articles: List[Dict[str, Any]], keywords: List[Dict[str, Any]]) -> str:
|
292 |
+
"""Create a concise, human-friendly overall summary to read out in audio."""
|
293 |
+
pos = sum(1 for a in articles if a.get("sentiment", {}).get("compound", 0) > 0.1)
|
294 |
+
neg = sum(1 for a in articles if a.get("sentiment", {}).get("compound", 0) < -0.1)
|
295 |
+
neu = len(articles) - pos - neg
|
296 |
+
|
297 |
+
top_kw = ", ".join(kw["keyword"] for kw in keywords[:8]) if keywords else ""
|
298 |
+
|
299 |
+
latest_title = ""
|
300 |
try:
|
301 |
+
latest = sorted(
|
302 |
+
[a for a in articles if a.get("date")],
|
303 |
+
key=lambda x: x.get("date"),
|
304 |
+
reverse=True,
|
305 |
+
)
|
306 |
+
if latest:
|
307 |
+
latest_title = latest[0].get("title", "")[:120]
|
308 |
+
except Exception:
|
309 |
+
pass
|
310 |
+
|
311 |
+
parts = [
|
312 |
+
f"News analysis summary for {len(articles)} articles.",
|
313 |
+
f"Overall sentiment: {pos} positive, {neg} negative, and {neu} neutral articles.",
|
314 |
+
]
|
315 |
+
if latest_title:
|
316 |
+
parts.append(f"Latest development: {latest_title}.")
|
317 |
+
if top_kw:
|
318 |
+
parts.append(f"Top themes include: {top_kw}.")
|
319 |
+
parts.append("This concludes the summary.")
|
320 |
+
return " ".join(parts)
|
321 |
+
|
322 |
+
|
323 |
+
# Single global analyzer (works fine for Spaces + Streamlit)
|
324 |
analyzer = NewsAnalyzer()
|
325 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
326 |
|
327 |
+
# ------------------------------------------------------------------------------
|
328 |
+
# Routes
|
329 |
+
# ------------------------------------------------------------------------------
|
330 |
+
|
331 |
+
@app.get("/health")
|
332 |
+
def health() -> Dict[str, Any]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
333 |
return {
|
334 |
+
"status": "ok",
|
335 |
+
"time": datetime.utcnow().isoformat(),
|
336 |
+
"config": load_config(),
|
|
|
337 |
}
|
338 |
|
339 |
+
|
340 |
+
@app.get("/api/analyze", response_model=AnalyzeResponse)
|
341 |
+
def analyze_get(
|
342 |
+
query: str = Query(..., description="Company / stock / keyword"),
|
343 |
+
num_articles: int = Query(20, ge=5, le=50),
|
344 |
+
languages: str = Query("English", description="Comma-separated languages"),
|
345 |
+
include_audio: bool = Query(True),
|
346 |
+
sentiment_models: str = Query("VADER,Loughran-McDonald,FinBERT"),
|
347 |
):
|
348 |
+
req = AnalyzeRequest(
|
349 |
+
query=query.strip(),
|
350 |
+
num_articles=num_articles,
|
351 |
+
languages=[x.strip() for x in languages.split(",") if x.strip()],
|
352 |
+
include_audio=include_audio,
|
353 |
+
sentiment_models=[x.strip() for x in sentiment_models.split(",") if x.strip()],
|
354 |
+
)
|
355 |
+
result = analyzer.analyze_news(req.dict())
|
356 |
+
return AnalyzeResponse(**result)
|
|
|
|
|
|
|
|
|
|
|
|
|
357 |
|
358 |
+
|
359 |
+
@app.post("/api/analyze", response_model=AnalyzeResponse)
|
360 |
+
def analyze_post(payload: AnalyzeRequest):
|
361 |
+
result = analyzer.analyze_news(payload.dict())
|
362 |
+
return AnalyzeResponse(**result)
|
363 |
+
|
364 |
+
|
365 |
+
# UVicorn hint (not used on Spaces; kept for local runs)
|
366 |
if __name__ == "__main__":
|
367 |
import uvicorn
|
368 |
+
|
369 |
+
host = os.getenv("FASTAPI_HOST", "0.0.0.0")
|
370 |
+
port = int(os.getenv("FASTAPI_PORT", "8000"))
|
371 |
+
uvicorn.run("api:app", host=host, port=port, reload=False)
|