File size: 19,598 Bytes
4ffe0a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bcd9469
4ffe0a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe26ab1
 
4ffe0a9
 
 
fe26ab1
 
4ffe0a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe26ab1
4ffe0a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe26ab1
 
4ffe0a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe26ab1
4ffe0a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe26ab1
4ffe0a9
 
 
 
 
 
 
 
 
fe26ab1
 
4ffe0a9
 
 
 
 
 
 
 
fe26ab1
4ffe0a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bcd9469
4ffe0a9
 
 
 
 
bcd9469
4ffe0a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
import os
import sys
import importlib.util
from pathlib import Path
import re
import json
import time
import logging
import gc
import asyncio
import aiohttp
from typing import Optional, Dict, Any
from fastapi import FastAPI, APIRouter, HTTPException
from pydantic import BaseModel
from urllib.parse import quote

# IMPORTANTE: Configurar variáveis de ambiente e PyTorch ANTES de qualquer importação que use PyTorch
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["OMP_NUM_THREADS"] = "2"
os.environ["MKL_NUM_THREADS"] = "2"

# Configurar PyTorch ANTES de importar qualquer módulo que o use
import torch
torch.set_num_threads(2)

# Verificar se já foi configurado antes de tentar definir interop threads
if not hasattr(torch, '_interop_threads_set'):
    try:
        torch.set_num_interop_threads(1)
        torch._interop_threads_set = True
    except RuntimeError as e:
        if "cannot set number of interop threads" in str(e):
            print(f"Warning: Could not set interop threads: {e}")
        else:
            raise e

# Supabase Config
SUPABASE_URL = "https://iiwbixdrrhejkthxygak.supabase.co"
SUPABASE_KEY = os.getenv("SUPA_KEY")
SUPABASE_ROLE_KEY = os.getenv("SUPA_SERVICE_KEY")
if not SUPABASE_KEY or not SUPABASE_ROLE_KEY:
    raise ValueError("❌ SUPA_KEY or SUPA_SERVICE_KEY not set in environment!")
SUPABASE_HEADERS = {
    "apikey": SUPABASE_KEY,
    "Authorization": f"Bearer {SUPABASE_KEY}",
    "Content-Type": "application/json"
}
SUPABASE_ROLE_HEADERS = {
    "apikey": SUPABASE_ROLE_KEY,
    "Authorization": f"Bearer {SUPABASE_ROLE_KEY}",
    "Content-Type": "application/json"
}

# Rewrite API URL
REWRITE_API_URL = "https://habulaj-newapi-clone.hf.space/rewrite-news"

logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
log = logging.getLogger("news-analyze-api")

http_session = None

async def get_http_session():
    global http_session
    if http_session is None:
        connector = aiohttp.TCPConnector(
            limit=20,
            limit_per_host=10,
            ttl_dns_cache=300,
            use_dns_cache=True,
            keepalive_timeout=30,
            enable_cleanup_closed=True
        )
        timeout = aiohttp.ClientTimeout(total=30, connect=5)
        http_session = aiohttp.ClientSession(
            connector=connector,
            timeout=timeout,
            headers={'User-Agent': 'NewsAnalyzeAPI/1.0 (https://example.com/contact)'}
        )
    return http_session

def load_inference_module():
    """Carrega o módulo inference.py dinamicamente"""
    try:
        # Assumindo que inference.py está no mesmo diretório ou em um caminho conhecido
        inference_path = Path(__file__).parent / "inference.py"  # Ajuste o caminho conforme necessário
        
        if not inference_path.exists():
            # Tenta outros caminhos possíveis
            possible_paths = [
                Path(__file__).parent.parent / "inference.py",
                Path("./inference.py"),
                Path("../inference.py")
            ]
            
            for path in possible_paths:
                if path.exists():
                    inference_path = path
                    break
            else:
                raise FileNotFoundError("inference.py não encontrado")
        
        spec = importlib.util.spec_from_file_location("inference", inference_path)
        inference_module = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(inference_module)
        
        return inference_module
    except Exception as e:
        log.error(f"Erro ao carregar inference.py: {str(e)}")
        return None

# Carrega o módulo na inicialização
inference_module = load_inference_module()

async def rewrite_article_direct(content: str) -> Optional[Dict[str, Any]]:
    """Reescreve o artigo chamando diretamente a função do inference.py"""
    try:
        if not inference_module:
            log.error("Módulo inference não carregado, fallback para API HTTP")
            return await rewrite_article_http(content)
        
        log.info(f"Reescrevendo artigo diretamente: {len(content)} caracteres")
        
        # Cria um objeto similar ao NewsRequest
        class NewsRequest:
            def __init__(self, content: str):
                self.content = content
        
        news_request = NewsRequest(content)
        
        # Chama a função rewrite_news diretamente
        result = await inference_module.rewrite_news(news_request)
        
        # Converte o resultado para dicionário
        rewritten_data = {
            "title": result.title,
            "subhead": result.subhead,
            "content": result.content,
            "title_instagram": getattr(result, "title_instagram", ""),
            "content_instagram": getattr(result, "content_instagram", "")
        }
        
        # Validação básica da resposta
        required_keys = ["title", "subhead", "content", "title_instagram", "content_instagram"]
        if all(key in rewritten_data and rewritten_data[key].strip() for key in required_keys):
            log.info("Artigo reescrito com sucesso (chamada direta)")
            return {
                "success": True,
                "data": rewritten_data,
                "raw_response": str(rewritten_data),
                "status_code": 200,
                "method": "direct_call"
            }
        else:
            log.error("Resposta da reescrita direta incompleta")
            return {
                "success": False,
                "error": "Resposta incompleta",
                "data": rewritten_data,
                "raw_response": str(rewritten_data),
                "status_code": 200,
                "method": "direct_call",
                "missing_keys": [key for key in required_keys if not rewritten_data.get(key, "").strip()]
            }
            
    except Exception as e:
        log.error(f"Erro na reescrita direta: {str(e)}")
        log.info("Tentando fallback para API HTTP")
        return await rewrite_article_http(content)

async def rewrite_article_http(content: str) -> Optional[Dict[str, Any]]:
    """Reescreve o artigo usando a API HTTP (função original)"""
    try:
        session = await get_http_session()
        
        payload = {"content": content}
        
        log.info(f"Enviando artigo para reescrita (HTTP): {len(content)} caracteres")
        
        # Timeout maior para a API HTTP
        timeout = aiohttp.ClientTimeout(total=120, connect=10)  # 2 minutos
        
        async with session.post(
            REWRITE_API_URL,
            json=payload,
            headers={"Content-Type": "application/json"},
            timeout=timeout
        ) as response:
            
            # Log detalhado do status e headers
            log.info(f"Status da resposta HTTP: {response.status}")
            
            # Captura o body completo da resposta
            response_text = await response.text()
            log.info(f"Body completo da resposta HTTP: {response_text}")
            
            if response.status == 200:
                try:
                    # Tenta fazer parse do JSON
                    rewritten_data = json.loads(response_text)
                    
                    # Validação básica da resposta
                    required_keys = ["title", "subhead", "content", "title_instagram", "content_instagram"]
                    if all(key in rewritten_data for key in required_keys):
                        log.info("Artigo reescrito com sucesso (HTTP)")
                        return {
                            "success": True,
                            "data": rewritten_data,
                            "raw_response": response_text,
                            "status_code": response.status,
                            "method": "http_call"
                        }
                    else:
                        log.error(f"Resposta HTTP incompleta. Chaves encontradas: {list(rewritten_data.keys())}")
                        return {
                            "success": False,
                            "error": "Resposta incompleta",
                            "data": rewritten_data,
                            "raw_response": response_text,
                            "status_code": response.status,
                            "method": "http_call",
                            "missing_keys": [key for key in required_keys if key not in rewritten_data]
                        }
                        
                except json.JSONDecodeError as e:
                    log.error(f"Erro ao fazer parse do JSON: {str(e)}")
                    return {
                        "success": False,
                        "error": f"JSON inválido: {str(e)}",
                        "raw_response": response_text,
                        "status_code": response.status,
                        "method": "http_call"
                    }
            else:
                log.error(f"Erro na API HTTP: {response.status}")
                return {
                    "success": False,
                    "error": f"HTTP {response.status}",
                    "raw_response": response_text,
                    "status_code": response.status,
                    "method": "http_call"
                }
                
    except asyncio.TimeoutError:
        log.error("Timeout na API HTTP")
        return {
            "success": False,
            "error": "Timeout",
            "raw_response": "Timeout occurred",
            "status_code": 0,
            "method": "http_call"
        }
    except Exception as e:
        log.error(f"Erro na API HTTP: {str(e)}")
        return {
            "success": False,
            "error": str(e),
            "raw_response": "Exception occurred",
            "status_code": 0,
            "method": "http_call"
        }

async def rewrite_article(content: str) -> Optional[Dict[str, Any]]:
    """Reescreve o artigo - tenta chamada direta primeiro, depois HTTP"""
    
    # Tenta chamada direta primeiro
    result = await rewrite_article_direct(content)
    
    # Se a chamada direta falhou e não foi um fallback, tenta HTTP
    if not result or (not result.get("success") and result.get("method") == "direct_call"):
        log.info("Chamada direta falhou, tentando API HTTP")
        result = await rewrite_article_http(content)
    
    return result

async def fetch_brazil_interest_news():
    """Busca uma notícia com brazil_interest=true e title_pt vazio"""
    try:
        session = await get_http_session()
        url = f"{SUPABASE_URL}/rest/v1/news"
        params = {
            "brazil_interest": "eq.true",
            "title_pt": "is.null",
            "limit": "1",
            "order": "created_at.asc"
        }
        
        async with session.get(url, headers=SUPABASE_HEADERS, params=params) as response:
            if response.status != 200:
                raise HTTPException(status_code=500, detail="Erro ao buscar notícia")
            
            data = await response.json()
            if not data:
                raise HTTPException(status_code=404, detail="Nenhuma notícia com brazil_interest=true e title_pt vazio disponível")
            
            return data[0]
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Erro Supabase: {str(e)}")

async def update_news_rewrite(news_id: int, rewritten_data: Dict[str, str]):
    """Atualiza a notícia com os dados reescritos incluindo campos do Instagram"""
    try:
        session = await get_http_session()
        url = f"{SUPABASE_URL}/rest/v1/news"
        params = {"id": f"eq.{news_id}"}
        
        payload = {
            "title_pt": rewritten_data.get("title", ""),
            "text_pt": rewritten_data.get("content", ""),
            "subhead_pt": rewritten_data.get("subhead", ""),
            "title_instagram": rewritten_data.get("title_instagram", ""),
            "content_instagram": rewritten_data.get("content_instagram", "")
        }
        
        async with session.patch(url, headers=SUPABASE_ROLE_HEADERS, json=payload, params=params) as response:
            if response.status not in [200, 201, 204]:
                response_text = await response.text()
                log.error(f"Erro ao atualizar notícia - Status: {response.status}, Response: {response_text}")
                raise HTTPException(status_code=500, detail=f"Erro ao atualizar notícia - Status: {response.status}")
            
            log.info(f"Notícia {news_id} atualizada com sucesso - Status: {response.status}")
            
    except Exception as e:
        log.error(f"Erro ao atualizar notícia {news_id}: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Erro ao atualizar: {str(e)}")

def fix_wikipedia_image_url(url: str) -> str:
    if not url or not url.startswith('//upload.wikimedia.org'):
        return url
    
    if url.startswith('//'):
        url = 'https:' + url
    
    url = url.replace('/thumb/', '/')
    parts = url.split('/')
    if len(parts) >= 2:
        filename = parts[-1]
        if 'px-' in filename:
            filename = filename.split('px-', 1)[1]
        base_parts = parts[:-2]
        url = '/'.join(base_parts) + '/' + filename
    
    return url

def extract_birth_death_years(description: str) -> tuple[Optional[int], Optional[int]]:
    if not description:
        return None, None
    
    pattern = r'\((?:born\s+)?(\d{4})(?:[–-](\d{4}))?\)'
    match = re.search(pattern, description)
    
    if match:
        birth_year = int(match.group(1))
        death_year = int(match.group(2)) if match.group(2) else None
        if death_year is None:
            death_year = 2025
        return birth_year, death_year
    
    return None, None

async def fetch_wikipedia_info(entity_name: str) -> Optional[Dict[str, Any]]:
    try:
        session = await get_http_session()
        
        url = f"https://en.wikipedia.org/w/rest.php/v1/search/title"
        params = {'q': entity_name, 'limit': 1}
        
        async with session.get(url, params=params) as response:
            if response.status != 200:
                return None
                
            data = await response.json()
            
            if not data.get('pages'):
                return None
            
            page = data['pages'][0]
            title = page.get('title', '')
            description = page.get('description', '')
            thumbnail = page.get('thumbnail', {})
            
            birth_year, death_year = extract_birth_death_years(description)
            
            image_url = thumbnail.get('url', '') if thumbnail else ''
            if image_url:
                image_url = fix_wikipedia_image_url(image_url)
            
            return {
                'title': title,
                'birth_year': birth_year,
                'death_year': death_year,
                'image_url': image_url
            }
            
    except Exception as e:
        log.error(f"Erro ao buscar Wikipedia: {str(e)}")
        return None

def generate_poster_url(name: str, birth: int, death: int, image_url: str) -> str:
    base_url = "https://habulaj-newapi-clone.hf.space/cover/memoriam"
    params = f"?image_url={quote(image_url)}&name={quote(name)}&birth={birth}&death={death}"
    return base_url + params

def generate_news_poster_url(image_url: str, headline: str) -> str:
    """Gera URL do poster para notícias normais (não morte)"""
    base_url = "https://habulaj-newapi-clone.hf.space/cover/news"
    params = f"?image_url={quote(image_url)}&headline={quote(headline)}"
    return base_url + params

async def generate_poster_analysis(news_data: Dict[str, Any], rewritten_result: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
    """Gera análise de poster baseado nos dados da notícia"""
    try:
        result = {}
        image_url = news_data.get("image", "")
        
        # Verifica se é morte e gera poster apropriado
        if news_data.get("death_related") is True and news_data.get("entity_name"):
            wikipedia_info = await fetch_wikipedia_info(news_data["entity_name"])
            
            if wikipedia_info:
                result["wikipedia_info"] = wikipedia_info
                
                # Gera poster de morte apenas se tiver morte confirmada
                if (wikipedia_info.get("death_year") and 
                    wikipedia_info.get("birth_year")):
                    
                    poster_url = generate_poster_url(
                        wikipedia_info["title"],
                        wikipedia_info["birth_year"],
                        wikipedia_info["death_year"],
                        wikipedia_info.get("image_url", image_url)
                    )
                    result["poster"] = poster_url
        
        # Se não for morte, gera poster de notícia normal
        if "poster" not in result and image_url:
            # Usa headline reescrito se disponível, senão usa título original
            headline_to_use = news_data.get("title_en", "")  # fallback para título original
            if (rewritten_result and 
                rewritten_result.get("success") and 
                rewritten_result.get("data") and 
                rewritten_result["data"].get("title")):
                headline_to_use = rewritten_result["data"]["title"]
            
            news_poster_url = generate_news_poster_url(image_url, headline_to_use)
            result["poster"] = news_poster_url

        return result

    except Exception as e:
        log.error(f"Erro ao gerar poster: {str(e)}")
        return {}

app = FastAPI(title="News Analyze API")
router = APIRouter()

@router.post("/analyze")
async def analyze_endpoint():
    # Busca notícia com brazil_interest=true e title_pt vazio
    news_data = await fetch_brazil_interest_news()
    
    title_en = news_data.get("title_en", "")
    text_en = news_data.get("text_en", "")
    news_id = news_data.get("id")
    
    if not title_en.strip() or not text_en.strip():
        raise HTTPException(status_code=400, detail="Title_en and text_en must not be empty.")
    
    # Executa reescrita (tenta direta primeiro, depois HTTP)
    rewritten_result = await rewrite_article(text_en)
    
    # Log do resultado completo da reescrita
    log.info(f"Resultado completo da reescrita: {json.dumps(rewritten_result, indent=2)}")
    
    # Atualiza no banco de dados se reescrita foi bem-sucedida
    if rewritten_result and rewritten_result.get("success") and rewritten_result.get("data"):
        await update_news_rewrite(news_id, rewritten_result["data"])
    
    # Gera análise de poster
    poster_analysis = await generate_poster_analysis(news_data, rewritten_result)
    
    # Prepara resultado final
    result = {
        "news_id": news_id,
        "title_en": title_en,
        "text_en": text_en,
        "rewrite_result": rewritten_result,
        "death_related": news_data.get("death_related", False),
        "entity_name": news_data.get("entity_name", ""),
        "entity_type": news_data.get("entity_type", ""),
        "image": news_data.get("image", "")
    }
    
    # Adiciona informações do poster se disponíveis
    if poster_analysis:
        result.update(poster_analysis)
    
    return result

app.include_router(router)

@app.on_event("shutdown")
async def shutdown_event():
    global http_session
    if http_session:
        await http_session.close()