wekey1998 commited on
Commit
c844811
·
verified ·
1 Parent(s): 91bee6d

Update api.py

Browse files
Files changed (1) hide show
  1. api.py +327 -311
api.py CHANGED
@@ -1,355 +1,371 @@
1
- from fastapi import FastAPI, HTTPException, Query
2
- from fastapi.middleware.cors import CORSMiddleware
3
- from pydantic import BaseModel
4
- from typing import List, Optional, Dict, Any
5
- import asyncio
 
 
 
 
 
 
6
  import logging
7
  from datetime import datetime
8
- import json
9
 
10
- # Import our modules
 
 
 
 
 
 
 
 
 
 
11
  from scraper import NewsletterScraper
12
- from nlp import SentimentAnalyzer, KeywordExtractor
13
- from summarizer import TextSummarizer
14
  from translator import MultilingualTranslator
15
  from tts import AudioGenerator
16
- from utils import setup_logging, cache_results
 
 
 
 
17
 
18
- # Setup logging
19
  setup_logging()
20
- logger = logging.getLogger(__name__)
21
 
22
- # FastAPI app
23
  app = FastAPI(
24
- title="Global Business News Intelligence API",
25
- description="Advanced news analysis with sentiment, summarization, and multilingual support",
26
- version="1.0.0"
27
  )
28
 
29
- # CORS middleware
 
30
  app.add_middleware(
31
  CORSMiddleware,
32
  allow_origins=["*"],
33
- allow_credentials=True,
34
  allow_methods=["*"],
35
  allow_headers=["*"],
36
  )
37
 
38
- class AnalysisRequest(BaseModel):
39
- query: str
40
- num_articles: int = 20
41
- languages: List[str] = ["English"]
 
 
 
 
 
42
  include_audio: bool = True
43
- sentiment_models: List[str] = ["VADER", "Loughran-McDonald", "FinBERT"]
 
 
 
44
 
45
- class AnalysisResponse(BaseModel):
46
  query: str
47
- total_articles: int
48
- processing_time: float
49
- average_sentiment: float
50
- sentiment_distribution: Dict[str, int]
51
  articles: List[Dict[str, Any]]
52
  keywords: List[Dict[str, Any]]
53
- summary: Dict[str, Any]
54
  languages: List[str]
55
- audio_files: Optional[Dict[str, str]] = None
 
 
 
 
 
56
 
57
  class NewsAnalyzer:
58
- """Main news analysis orchestrator"""
59
-
60
- def __init__(self):
61
- self.scraper = NewsletterScraper()
62
- self.sentiment_analyzer = SentimentAnalyzer()
63
- self.keyword_extractor = KeywordExtractor()
64
- self.summarizer = TextSummarizer()
65
- self.translator = MultilingualTranslator()
66
- self.audio_generator = AudioGenerator()
67
-
68
- logger.info("NewsAnalyzer initialized successfully")
69
-
70
- async def analyze_news_async(self, config: Dict[str, Any], progress_callback=None) -> Dict[str, Any]:
71
- """Async version of analyze_news"""
72
- return self.analyze_news(config, progress_callback)
73
-
74
- def analyze_news(self, config: Dict[str, Any], progress_callback=None) -> Dict[str, Any]:
75
- """Main analysis pipeline"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  start_time = datetime.now()
77
-
78
- try:
79
- query = config['query']
80
- num_articles = config.get('num_articles', 20)
81
- languages = config.get('languages', ['English'])
82
- include_audio = config.get('include_audio', True)
83
- sentiment_models = config.get('sentiment_models', ['VADER', 'Loughran-McDonald', 'FinBERT'])
84
-
85
- logger.info(f"Starting analysis for query: {query}")
86
-
87
- if progress_callback:
88
- progress_callback(10, "Scraping articles...")
89
-
90
- # Step 1: Scrape articles
91
- articles = self.scraper.scrape_news(query, num_articles)
92
- logger.info(f"Scraped {len(articles)} articles")
93
-
94
- if not articles:
95
- raise ValueError("No articles found for the given query")
96
-
97
- if progress_callback:
98
- progress_callback(30, "Analyzing sentiment...")
99
-
100
- # Step 2: Sentiment analysis
101
- for article in articles:
102
- article['sentiment'] = self.sentiment_analyzer.analyze_sentiment(
103
- article['content'],
104
- models=sentiment_models
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  )
106
-
107
- if progress_callback:
108
- progress_callback(50, "Extracting keywords...")
109
-
110
- # Step 3: Keyword extraction
111
- all_text = ' '.join([article['content'] for article in articles])
112
- keywords = self.keyword_extractor.extract_keywords(all_text)
113
-
114
- if progress_callback:
115
- progress_callback(60, "Generating summaries...")
116
-
117
- # Step 4: Summarization
118
- for article in articles:
119
- article['summary'] = self.summarizer.summarize(article['content'])
120
-
121
- # Multilingual summaries
122
- if len(languages) > 1:
123
- article['summaries'] = {}
124
- for lang in languages:
125
- if lang != 'English':
126
- article['summaries'][lang] = self.translator.translate(
127
- article['summary'],
128
- target_lang=lang
129
- )
130
- else:
131
- article['summaries'][lang] = article['summary']
132
-
133
  if progress_callback:
134
- progress_callback(80, "Generating audio...")
135
-
136
- # Step 5: Audio generation
137
- audio_files = {}
138
- if include_audio and languages:
139
- # Create overall summary for audio
140
- overall_summary = self.create_overall_summary(articles, keywords)
141
-
142
  for lang in languages:
143
- if lang in ['English', 'Hindi', 'Tamil']:
144
- try:
145
- if lang != 'English':
146
- summary_text = self.translator.translate(overall_summary, target_lang=lang)
147
- else:
148
- summary_text = overall_summary
149
-
150
- audio_file = self.audio_generator.generate_audio(
151
- summary_text,
152
- language=lang,
153
- output_file=f"summary_{lang.lower()}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3"
154
  )
155
- audio_files[lang] = audio_file
156
- except Exception as e:
157
- logger.error(f"Error generating audio for {lang}: {str(e)}")
158
-
159
- if progress_callback:
160
- progress_callback(90, "Finalizing results...")
161
-
162
- # Step 6: Calculate summary statistics
163
- sentiments = [article['sentiment']['compound'] for article in articles]
164
- average_sentiment = sum(sentiments) / len(sentiments) if sentiments else 0.0
165
-
166
- sentiment_distribution = {
167
- 'Positive': sum(1 for s in sentiments if s > 0.1),
168
- 'Negative': sum(1 for s in sentiments if s < -0.1),
169
- 'Neutral': sum(1 for s in sentiments if -0.1 <= s <= 0.1)
170
- }
171
-
172
- # Step 7: Prepare results
173
- processing_time = (datetime.now() - start_time).total_seconds()
174
-
175
- results = {
176
- 'query': query,
177
- 'total_articles': len(articles),
178
- 'processing_time': processing_time,
179
- 'average_sentiment': average_sentiment,
180
- 'sentiment_distribution': sentiment_distribution,
181
- 'articles': articles,
182
- 'keywords': keywords,
183
- 'languages': languages,
184
- 'audio_files': audio_files,
185
- 'summary': {
186
- 'average_sentiment': average_sentiment,
187
- 'total_articles': len(articles),
188
- 'sources': len(set([article['source'] for article in articles])),
189
- 'date_range': self.get_date_range(articles)
190
- }
191
- }
192
-
193
  if progress_callback:
194
- progress_callback(100, "Analysis complete!")
195
-
196
- logger.info(f"Analysis completed successfully in {processing_time:.2f} seconds")
197
- return results
198
-
199
- except Exception as e:
200
- logger.error(f"Error in analysis pipeline: {str(e)}")
201
- raise e
202
-
203
- def create_overall_summary(self, articles: List[Dict], keywords: List[Dict]) -> str:
204
- """Create an overall summary for audio generation"""
205
- try:
206
- # Get top keywords
207
- top_keywords = [kw['keyword'] for kw in keywords[:10]]
208
-
209
- # Calculate sentiment distribution
210
- positive_count = sum(1 for article in articles if article['sentiment']['compound'] > 0.1)
211
- negative_count = sum(1 for article in articles if article['sentiment']['compound'] < -0.1)
212
- neutral_count = len(articles) - positive_count - negative_count
213
-
214
- # Create summary text
215
- summary = f"Analysis of {len(articles)} articles reveals "
216
-
217
- if positive_count > negative_count:
218
- summary += f"predominantly positive sentiment with {positive_count} positive, {negative_count} negative, and {neutral_count} neutral articles. "
219
- elif negative_count > positive_count:
220
- summary += f"predominantly negative sentiment with {negative_count} negative, {positive_count} positive, and {neutral_count} neutral articles. "
221
- else:
222
- summary += f"mixed sentiment with balanced coverage. "
223
-
224
- if top_keywords:
225
- summary += f"Key topics include: {', '.join(top_keywords[:5])}. "
226
-
227
- # Add top stories
228
- top_positive = sorted(articles, key=lambda x: x['sentiment']['compound'], reverse=True)[:2]
229
- top_negative = sorted(articles, key=lambda x: x['sentiment']['compound'])[:2]
230
-
231
- if top_positive[0]['sentiment']['compound'] > 0.1:
232
- summary += f"Most positive coverage: {top_positive[0]['title'][:100]}. "
233
-
234
- if top_negative[0]['sentiment']['compound'] < -0.1:
235
- summary += f"Most concerning coverage: {top_negative[0]['title'][:100]}. "
236
-
237
- return summary
238
-
239
- except Exception as e:
240
- logger.error(f"Error creating overall summary: {str(e)}")
241
- return f"Analysis of {len(articles)} articles completed successfully."
242
-
243
- def get_date_range(self, articles: List[Dict]) -> Dict[str, str]:
244
- """Get the date range of articles"""
 
 
 
245
  try:
246
- dates = [article['date'] for article in articles if 'date' in article and article['date']]
247
- if dates:
248
- dates = [d for d in dates if d is not None]
249
- if dates:
250
- min_date = min(dates)
251
- max_date = max(dates)
252
- return {
253
- 'start': str(min_date),
254
- 'end': str(max_date)
255
- }
256
- return {'start': 'Unknown', 'end': 'Unknown'}
257
- except Exception as e:
258
- logger.error(f"Error getting date range: {str(e)}")
259
- return {'start': 'Unknown', 'end': 'Unknown'}
260
-
261
- # Initialize the analyzer
 
 
 
 
 
 
 
262
  analyzer = NewsAnalyzer()
263
 
264
- # FastAPI endpoints
265
- @app.get("/", response_model=Dict[str, str])
266
- async def root():
267
- """API root endpoint"""
268
- return {
269
- "message": "Global Business News Intelligence API",
270
- "version": "1.0.0",
271
- "docs": "/docs"
272
- }
273
 
274
- @app.get("/health", response_model=Dict[str, str])
275
- async def health_check():
276
- """Health check endpoint"""
277
- return {"status": "healthy", "timestamp": datetime.now().isoformat()}
278
-
279
- @app.get("/api/analyze", response_model=AnalysisResponse)
280
- async def analyze_news_endpoint(
281
- query: str = Query(..., description="Company name, ticker, or keyword to analyze"),
282
- num_articles: int = Query(20, description="Number of articles to analyze (5-50)", ge=5, le=50),
283
- languages: List[str] = Query(["English"], description="Languages for summaries"),
284
- include_audio: bool = Query(True, description="Generate audio summaries"),
285
- sentiment_models: List[str] = Query(["VADER", "Loughran-McDonald", "FinBERT"], description="Sentiment models to use")
286
- ):
287
- """Main analysis endpoint"""
288
- try:
289
- config = {
290
- 'query': query,
291
- 'num_articles': num_articles,
292
- 'languages': languages,
293
- 'include_audio': include_audio,
294
- 'sentiment_models': sentiment_models
295
- }
296
-
297
- results = await analyzer.analyze_news_async(config)
298
-
299
- return AnalysisResponse(**results)
300
-
301
- except Exception as e:
302
- logger.error(f"Error in analyze endpoint: {str(e)}")
303
- raise HTTPException(status_code=500, detail=str(e))
304
-
305
- @app.post("/api/analyze", response_model=AnalysisResponse)
306
- async def analyze_news_post(request: AnalysisRequest):
307
- """POST version of analysis endpoint"""
308
- try:
309
- config = request.dict()
310
- results = await analyzer.analyze_news_async(config)
311
- return AnalysisResponse(**results)
312
-
313
- except Exception as e:
314
- logger.error(f"Error in analyze POST endpoint: {str(e)}")
315
- raise HTTPException(status_code=500, detail=str(e))
316
-
317
- @app.get("/api/sources", response_model=List[str])
318
- async def get_available_sources():
319
- """Get list of available news sources"""
320
- return analyzer.scraper.get_available_sources()
321
-
322
- @app.get("/api/models", response_model=Dict[str, List[str]])
323
- async def get_available_models():
324
- """Get list of available models"""
325
  return {
326
- "sentiment_models": ["VADER", "Loughran-McDonald", "FinBERT"],
327
- "summarization_models": ["distilbart-cnn-12-6"],
328
- "translation_models": ["Helsinki-NLP/opus-mt-en-hi", "Helsinki-NLP/opus-mt-en-fi"],
329
- "audio_languages": ["English", "Hindi", "Tamil"]
330
  }
331
 
332
- @app.get("/api/keywords/{query}", response_model=List[Dict[str, Any]])
333
- async def extract_keywords_endpoint(
334
- query: str,
335
- num_keywords: int = Query(20, description="Number of keywords to extract", ge=5, le=50)
 
 
 
 
336
  ):
337
- """Extract keywords from a query or text"""
338
- try:
339
- # For demo purposes, we'll scrape a few articles and extract keywords
340
- articles = analyzer.scraper.scrape_news(query, 5)
341
- if not articles:
342
- raise HTTPException(status_code=404, detail="No articles found for query")
343
-
344
- all_text = ' '.join([article['content'] for article in articles])
345
- keywords = analyzer.keyword_extractor.extract_keywords(all_text, num_keywords=num_keywords)
346
-
347
- return keywords
348
-
349
- except Exception as e:
350
- logger.error(f"Error in keywords endpoint: {str(e)}")
351
- raise HTTPException(status_code=500, detail=str(e))
352
 
 
 
 
 
 
 
 
 
353
  if __name__ == "__main__":
354
  import uvicorn
355
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
 
 
 
1
+ # api.py
2
+ """
3
+ FastAPI backend for the News Sentiment Analyzer.
4
+ - Orchestrates scraping, NLP, summarization, translation, and TTS.
5
+ - Safe for Hugging Face Spaces (CPU-only, lazy model loading, CORS open).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import os
11
+ import json
12
  import logging
13
  from datetime import datetime
14
+ from typing import Any, Dict, List, Optional
15
 
16
+ from fastapi import FastAPI, Query
17
+ from fastapi.middleware.cors import CORSMiddleware
18
+ from pydantic import BaseModel, Field
19
+
20
+ # Local modules
21
+ from utils import (
22
+ setup_logging,
23
+ load_config,
24
+ calculate_processing_stats,
25
+ calculate_sentiment_distribution,
26
+ )
27
  from scraper import NewsletterScraper
28
+ from summarizer import TextSummarizer, extract_key_sentences
 
29
  from translator import MultilingualTranslator
30
  from tts import AudioGenerator
31
+ from nlp import SentimentAnalyzer, KeywordExtractor, TextProcessor # provided in your repo
32
+
33
+ # ------------------------------------------------------------------------------
34
+ # Init
35
+ # ------------------------------------------------------------------------------
36
 
 
37
  setup_logging()
38
+ logger = logging.getLogger("api")
39
 
 
40
  app = FastAPI(
41
+ title="News Intelligence API",
42
+ version="1.0.0",
43
+ description="Backend for News Sentiment Analyzer (Hugging Face deploy-ready)",
44
  )
45
 
46
+ # Hugging Face Spaces often runs UI + API from same origin,
47
+ # but open CORS to keep it simple for local/dev and Space builds.
48
  app.add_middleware(
49
  CORSMiddleware,
50
  allow_origins=["*"],
 
51
  allow_methods=["*"],
52
  allow_headers=["*"],
53
  )
54
 
55
+
56
+ # ------------------------------------------------------------------------------
57
+ # Pydantic models
58
+ # ------------------------------------------------------------------------------
59
+
60
+ class AnalyzeRequest(BaseModel):
61
+ query: str = Field(..., description="Company / stock / keyword to analyze")
62
+ num_articles: int = Field(20, ge=5, le=50, description="Number of articles (5-50)")
63
+ languages: List[str] = Field(default_factory=lambda: ["English"])
64
  include_audio: bool = True
65
+ sentiment_models: List[str] = Field(
66
+ default_factory=lambda: ["VADER", "Loughran-McDonald", "FinBERT"]
67
+ )
68
+
69
 
70
+ class AnalyzeResponse(BaseModel):
71
  query: str
72
+ summary: Dict[str, Any]
 
 
 
73
  articles: List[Dict[str, Any]]
74
  keywords: List[Dict[str, Any]]
75
+ audio_files: Optional[Dict[str, Optional[str]]] = None
76
  languages: List[str]
77
+ config: Dict[str, Any]
78
+
79
+
80
+ # ------------------------------------------------------------------------------
81
+ # Core Orchestrator
82
+ # ------------------------------------------------------------------------------
83
 
84
  class NewsAnalyzer:
85
+ """
86
+ All heavy components are created lazily to avoid high cold-start memory usage
87
+ and to play nice with Hugging Face CPU-only Spaces.
88
+ """
89
+
90
+ def __init__(self) -> None:
91
+ self._cfg = load_config()
92
+
93
+ self._scraper: Optional[NewsletterScraper] = None
94
+ self._summarizer: Optional[TextSummarizer] = None
95
+ self._translator: Optional[MultilingualTranslator] = None
96
+ self._audio: Optional[AudioGenerator] = None
97
+ self._sentiment: Optional[SentimentAnalyzer] = None
98
+ self._keywords: Optional[KeywordExtractor] = None
99
+ self._textproc: Optional[TextProcessor] = None
100
+
101
+ logger.info("NewsAnalyzer initialized with lazy components.")
102
+
103
+ # --- Lazy props -----------------------------------------------------------
104
+
105
+ @property
106
+ def scraper(self) -> NewsletterScraper:
107
+ if self._scraper is None:
108
+ self._scraper = NewsletterScraper()
109
+ return self._scraper
110
+
111
+ @property
112
+ def summarizer(self) -> TextSummarizer:
113
+ if self._summarizer is None:
114
+ self._summarizer = TextSummarizer()
115
+ return self._summarizer
116
+
117
+ @property
118
+ def translator(self) -> MultilingualTranslator:
119
+ if self._translator is None:
120
+ self._translator = MultilingualTranslator()
121
+ return self._translator
122
+
123
+ @property
124
+ def audio(self) -> AudioGenerator:
125
+ if self._audio is None:
126
+ self._audio = AudioGenerator()
127
+ return self._audio
128
+
129
+ @property
130
+ def sentiment(self) -> SentimentAnalyzer:
131
+ if self._sentiment is None:
132
+ self._sentiment = SentimentAnalyzer()
133
+ return self._sentiment
134
+
135
+ @property
136
+ def keyword_extractor(self) -> KeywordExtractor:
137
+ if self._keywords is None:
138
+ self._keywords = KeywordExtractor()
139
+ return self._keywords
140
+
141
+ @property
142
+ def textproc(self) -> TextProcessor:
143
+ if self._textproc is None:
144
+ self._textproc = TextProcessor()
145
+ return self._textproc
146
+
147
+ # --- Pipeline -------------------------------------------------------------
148
+
149
+ def analyze_news(
150
+ self,
151
+ config: Dict[str, Any],
152
+ progress_callback=None,
153
+ ) -> Dict[str, Any]:
154
+ """
155
+ Synchronous pipeline used by Streamlit UI.
156
+ (FastAPI endpoint wraps it synchronously as well.)
157
+ """
158
  start_time = datetime.now()
159
+ query: str = config["query"].strip()
160
+ num_articles: int = int(config.get("num_articles", 20))
161
+ languages: List[str] = config.get("languages", ["English"]) or ["English"]
162
+ include_audio: bool = bool(config.get("include_audio", True))
163
+ sentiment_models: List[str] = config.get(
164
+ "sentiment_models", ["VADER", "Loughran-McDonald", "FinBERT"]
165
+ )
166
+
167
+ if progress_callback:
168
+ progress_callback(5, "Initializing pipeline...")
169
+
170
+ # --- Step 1: Scrape ---------------------------------------------------
171
+ if progress_callback:
172
+ progress_callback(10, "Scraping articles...")
173
+ articles = self.scraper.scrape_news(query, max_articles=num_articles)
174
+
175
+ if not articles:
176
+ # Return graceful empty response rather than raising
177
+ return {
178
+ "query": query,
179
+ "summary": {
180
+ "average_sentiment": 0.0,
181
+ "distribution": {"positive": 0, "negative": 0, "neutral": 0, "total": 0},
182
+ "processing": calculate_processing_stats(start_time, 0),
183
+ },
184
+ "articles": [],
185
+ "keywords": [],
186
+ "audio_files": {},
187
+ "languages": languages,
188
+ "config": config,
189
+ }
190
+
191
+ # Ensure 'content' is present
192
+ for a in articles:
193
+ if not a.get("content"):
194
+ a["content"] = a.get("summary") or a.get("title") or ""
195
+
196
+ # --- Step 2: Sentiment ------------------------------------------------
197
+ if progress_callback:
198
+ progress_callback(30, "Analyzing sentiment...")
199
+ for a in articles:
200
+ try:
201
+ a["sentiment"] = self.sentiment.analyze_sentiment(
202
+ a["content"], models=sentiment_models
203
  )
204
+ except Exception as e:
205
+ logger.exception(f"Sentiment failed for '{a.get('title','')[:60]}': {e}")
206
+ a["sentiment"] = {"compound": 0.0}
207
+
208
+ # --- Step 3: Summaries ------------------------------------------------
209
+ if progress_callback:
210
+ progress_callback(50, "Generating summaries...")
211
+ for a in articles:
212
+ try:
213
+ a["summary"] = self.summarizer.summarize(a["content"])
214
+ except Exception as e:
215
+ logger.exception(f"Summarization failed: {e}")
216
+ a["summary"] = self.textproc.clean_text(a["content"])[:300] + "..."
217
+
218
+ # --- Step 4: Multilingual summaries ----------------------------------
219
+ if len(languages) > 1:
 
 
 
 
 
 
 
 
 
 
 
220
  if progress_callback:
221
+ progress_callback(60, "Translating summaries...")
222
+ for a in articles:
223
+ a["summaries"] = {}
 
 
 
 
 
224
  for lang in languages:
225
+ try:
226
+ if lang == "English":
227
+ a["summaries"][lang] = a["summary"]
228
+ else:
229
+ a["summaries"][lang] = self.translator.translate(
230
+ a["summary"], target_lang=lang, source_lang="English"
 
 
 
 
 
231
  )
232
+ except Exception as e:
233
+ logger.exception(f"Translation failed ({lang}): {e}")
234
+ a["summaries"][lang] = a["summary"]
235
+
236
+ # --- Step 5: Keywords (YAKE) -----------------------------------------
237
+ if progress_callback:
238
+ progress_callback(70, "Extracting keywords...")
239
+ joined = " ".join(a.get("content", "") for a in articles)
240
+ keywords = self.keyword_extractor.extract_keywords(joined) if joined else []
241
+
242
+ # --- Step 6: Optional Audio ------------------------------------------
243
+ audio_files: Dict[str, Optional[str]] = {}
244
+ if include_audio and languages:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  if progress_callback:
246
+ progress_callback(80, "Creating audio summaries...")
247
+ overall_summary = self._overall_summary_text(articles, keywords)
248
+ for lang in languages:
249
+ try:
250
+ summary_text = (
251
+ self.translator.translate(overall_summary, target_lang=lang)
252
+ if lang != "English"
253
+ else overall_summary
254
+ )
255
+ audio_files[lang] = self.audio.generate_audio(
256
+ summary_text,
257
+ language=lang,
258
+ output_file=f"summary_{lang.lower()}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3",
259
+ )
260
+ except Exception as e:
261
+ logger.exception(f"Audio failed ({lang}): {e}")
262
+ audio_files[lang] = None
263
+
264
+ # --- Summary stats ----------------------------------------------------
265
+ if progress_callback:
266
+ progress_callback(90, "Finalizing results...")
267
+ dist = calculate_sentiment_distribution(articles)
268
+ processing = calculate_processing_stats(start_time, len(articles))
269
+
270
+ results: Dict[str, Any] = {
271
+ "query": query,
272
+ "summary": {
273
+ "average_sentiment": dist.get("average_sentiment", 0.0),
274
+ "distribution": dist,
275
+ "processing": processing,
276
+ "top_sentences": extract_key_sentences(joined, num_sentences=3),
277
+ },
278
+ "articles": articles,
279
+ "keywords": keywords,
280
+ "audio_files": audio_files,
281
+ "languages": languages,
282
+ "config": config,
283
+ }
284
+
285
+ if progress_callback:
286
+ progress_callback(100, "Done.")
287
+ return results
288
+
289
+ # Helpers -----------------------------------------------------------------
290
+
291
+ def _overall_summary_text(self, articles: List[Dict[str, Any]], keywords: List[Dict[str, Any]]) -> str:
292
+ """Create a concise, human-friendly overall summary to read out in audio."""
293
+ pos = sum(1 for a in articles if a.get("sentiment", {}).get("compound", 0) > 0.1)
294
+ neg = sum(1 for a in articles if a.get("sentiment", {}).get("compound", 0) < -0.1)
295
+ neu = len(articles) - pos - neg
296
+
297
+ top_kw = ", ".join(kw["keyword"] for kw in keywords[:8]) if keywords else ""
298
+
299
+ latest_title = ""
300
  try:
301
+ latest = sorted(
302
+ [a for a in articles if a.get("date")],
303
+ key=lambda x: x.get("date"),
304
+ reverse=True,
305
+ )
306
+ if latest:
307
+ latest_title = latest[0].get("title", "")[:120]
308
+ except Exception:
309
+ pass
310
+
311
+ parts = [
312
+ f"News analysis summary for {len(articles)} articles.",
313
+ f"Overall sentiment: {pos} positive, {neg} negative, and {neu} neutral articles.",
314
+ ]
315
+ if latest_title:
316
+ parts.append(f"Latest development: {latest_title}.")
317
+ if top_kw:
318
+ parts.append(f"Top themes include: {top_kw}.")
319
+ parts.append("This concludes the summary.")
320
+ return " ".join(parts)
321
+
322
+
323
+ # Single global analyzer (works fine for Spaces + Streamlit)
324
  analyzer = NewsAnalyzer()
325
 
 
 
 
 
 
 
 
 
 
326
 
327
+ # ------------------------------------------------------------------------------
328
+ # Routes
329
+ # ------------------------------------------------------------------------------
330
+
331
+ @app.get("/health")
332
+ def health() -> Dict[str, Any]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
  return {
334
+ "status": "ok",
335
+ "time": datetime.utcnow().isoformat(),
336
+ "config": load_config(),
 
337
  }
338
 
339
+
340
+ @app.get("/api/analyze", response_model=AnalyzeResponse)
341
+ def analyze_get(
342
+ query: str = Query(..., description="Company / stock / keyword"),
343
+ num_articles: int = Query(20, ge=5, le=50),
344
+ languages: str = Query("English", description="Comma-separated languages"),
345
+ include_audio: bool = Query(True),
346
+ sentiment_models: str = Query("VADER,Loughran-McDonald,FinBERT"),
347
  ):
348
+ req = AnalyzeRequest(
349
+ query=query.strip(),
350
+ num_articles=num_articles,
351
+ languages=[x.strip() for x in languages.split(",") if x.strip()],
352
+ include_audio=include_audio,
353
+ sentiment_models=[x.strip() for x in sentiment_models.split(",") if x.strip()],
354
+ )
355
+ result = analyzer.analyze_news(req.dict())
356
+ return AnalyzeResponse(**result)
 
 
 
 
 
 
357
 
358
+
359
+ @app.post("/api/analyze", response_model=AnalyzeResponse)
360
+ def analyze_post(payload: AnalyzeRequest):
361
+ result = analyzer.analyze_news(payload.dict())
362
+ return AnalyzeResponse(**result)
363
+
364
+
365
+ # UVicorn hint (not used on Spaces; kept for local runs)
366
  if __name__ == "__main__":
367
  import uvicorn
368
+
369
+ host = os.getenv("FASTAPI_HOST", "0.0.0.0")
370
+ port = int(os.getenv("FASTAPI_PORT", "8000"))
371
+ uvicorn.run("api:app", host=host, port=port, reload=False)