habulaj commited on
Commit
49c5f61
·
verified ·
1 Parent(s): 0533635

Update routers/searchterm.py

Browse files
Files changed (1) hide show
  1. routers/searchterm.py +199 -120
routers/searchterm.py CHANGED
@@ -10,13 +10,17 @@ import uuid
10
  import time
11
  from pathlib import Path
12
  from urllib.parse import urlparse
13
- from typing import List, Dict, Any, Optional
14
  from fastapi import APIRouter, HTTPException, Body
15
  from fastapi.responses import FileResponse
16
  from newspaper import Article
17
  from threading import Timer
18
  from google import genai
19
  from google.genai import types
 
 
 
 
20
 
21
  router = APIRouter()
22
 
@@ -41,10 +45,16 @@ USER_AGENTS = [
41
  "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
42
  ]
43
 
44
- BLOCKED_DOMAINS = {"reddit.com", "www.reddit.com", "old.reddit.com",
45
- "quora.com", "www.quora.com"}
 
 
46
 
47
  MAX_TEXT_LENGTH = 4000
 
 
 
 
48
 
49
  # Diretório para arquivos temporários
50
  TEMP_DIR = Path("/tmp")
@@ -53,22 +63,30 @@ TEMP_DIR.mkdir(exist_ok=True)
53
  # Dicionário para controlar arquivos temporários
54
  temp_files = {}
55
 
 
 
 
 
 
56
 
57
  def is_blocked_domain(url: str) -> bool:
58
  try:
59
  host = urlparse(url).netloc.lower()
60
- return any(host == b or host.endswith("." + b) for b in BLOCKED_DOMAINS)
 
 
 
 
 
 
 
61
  except Exception:
62
  return False
63
 
64
-
65
  def clamp_text(text: str) -> str:
66
- if not text:
67
- return ""
68
- if len(text) > MAX_TEXT_LENGTH:
69
- return text[:MAX_TEXT_LENGTH]
70
- return text
71
-
72
 
73
  def get_realistic_headers() -> Dict[str, str]:
74
  return {
@@ -76,9 +94,9 @@ def get_realistic_headers() -> Dict[str, str]:
76
  "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
77
  "Accept-Language": "en-US,en;q=0.7,pt-BR;q=0.6",
78
  "Connection": "keep-alive",
 
79
  }
80
 
81
-
82
  def delete_temp_file(file_id: str, file_path: Path):
83
  """Remove arquivo temporário após expiração"""
84
  try:
@@ -89,17 +107,16 @@ def delete_temp_file(file_id: str, file_path: Path):
89
  except Exception as e:
90
  print(f"Erro ao remover arquivo temporário: {e}")
91
 
92
-
93
- def create_temp_file(data: Dict[str, Any]) -> Dict[str, str]:
94
- """Cria arquivo temporário e agenda sua remoção"""
95
  file_id = str(uuid.uuid4())
96
  file_path = TEMP_DIR / f"fontes_{file_id}.txt"
97
 
98
- # Salva o JSON no arquivo
99
- with open(file_path, 'w', encoding='utf-8') as f:
100
- json.dump(data, f, ensure_ascii=False, indent=2)
101
 
102
- # Agenda remoção em 24 horas (86400 segundos)
103
  timer = Timer(86400, delete_temp_file, args=[file_id, file_path])
104
  timer.start()
105
 
@@ -116,7 +133,6 @@ def create_temp_file(data: Dict[str, Any]) -> Dict[str, str]:
116
  "expires_in_hours": 24
117
  }
118
 
119
-
120
  async def generate_search_terms(context: str) -> List[str]:
121
  """Gera termos de pesquisa usando o modelo Gemini"""
122
  try:
@@ -163,9 +179,7 @@ Retorne apenas o JSON, sem mais nenhum texto."""
163
  ]
164
 
165
  generate_content_config = types.GenerateContentConfig(
166
- thinking_config=types.ThinkingConfig(
167
- thinking_budget=0,
168
- ),
169
  )
170
 
171
  # Coletamos toda a resposta em stream
@@ -180,7 +194,6 @@ Retorne apenas o JSON, sem mais nenhum texto."""
180
 
181
  # Tenta extrair o JSON da resposta
182
  try:
183
- # Remove possíveis ```json e ``` da resposta
184
  clean_response = full_response.strip()
185
  if clean_response.startswith("```json"):
186
  clean_response = clean_response[7:]
@@ -188,92 +201,163 @@ Retorne apenas o JSON, sem mais nenhum texto."""
188
  clean_response = clean_response[:-3]
189
  clean_response = clean_response.strip()
190
 
191
- # Parse do JSON
192
- response_data = json.loads(clean_response)
193
  terms = response_data.get("terms", [])
194
 
195
- # Validação básica
196
  if not isinstance(terms, list):
197
  raise ValueError("Terms deve ser uma lista")
198
 
199
- return terms[:20] # Garante máximo de 20 termos
200
 
201
- except (json.JSONDecodeError, ValueError) as e:
202
  print(f"Erro ao parsear resposta do Gemini: {e}")
203
- print(f"Resposta recebida: {full_response}")
204
- # Retorna uma lista vazia em caso de erro
205
  return []
206
 
207
  except Exception as e:
208
  print(f"Erro ao gerar termos de pesquisa: {str(e)}")
209
  return []
210
 
211
-
212
- async def search_brave_term(client: httpx.AsyncClient, term: str) -> List[Dict[str, str]]:
213
- params = {"q": term, "count": 10, "safesearch": "off", "summary": "false"}
214
 
215
- try:
216
- resp = await client.get(BRAVE_SEARCH_URL, headers=BRAVE_HEADERS, params=params)
217
- if resp.status_code != 200:
218
- return []
 
 
 
 
219
 
220
- data = resp.json()
221
- results: List[Dict[str, str]] = []
222
-
223
- if "web" in data and "results" in data["web"]:
224
- for item in data["web"]["results"]:
225
- url = item.get("url")
226
- age = item.get("age", "Unknown")
227
 
228
- if url and not is_blocked_domain(url):
229
- results.append({"url": url, "age": age})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
 
231
- return results
 
 
 
 
232
  except Exception:
233
- return []
234
-
235
 
236
- async def extract_article_text(url: str, session: aiohttp.ClientSession) -> str:
 
237
  try:
238
  art = Article(url)
239
  art.config.browser_user_agent = random.choice(USER_AGENTS)
240
- art.config.request_timeout = 8
241
  art.config.number_threads = 1
242
-
243
  art.download()
244
  art.parse()
245
- txt = (art.text or "").strip()
246
- if txt and len(txt) > 100:
247
- return clamp_text(txt)
248
  except Exception:
249
- pass
250
 
 
 
 
 
 
 
 
 
 
251
  try:
252
- await asyncio.sleep(random.uniform(0.1, 0.3))
253
-
254
  headers = get_realistic_headers()
255
- async with session.get(url, headers=headers, timeout=12) as resp:
256
  if resp.status != 200:
257
- return ""
 
 
258
 
259
  html = await resp.text()
260
 
261
- if re.search(r"(paywall|subscribe|metered|registration|captcha|access denied)", html, re.I):
262
- return ""
263
-
264
- extracted = trafilatura.extract(html) or ""
265
- extracted = extracted.strip()
266
- if extracted and len(extracted) > 100:
267
- return clamp_text(extracted)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
 
269
  except Exception:
270
- pass
271
-
272
- return ""
273
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
 
275
  @router.post("/search-terms")
276
  async def search_terms(payload: Dict[str, str] = Body(...)) -> Dict[str, Any]:
 
 
277
  context = payload.get("context")
278
  if not context or not isinstance(context, str):
279
  raise HTTPException(status_code=400, detail="Campo 'context' é obrigatório e deve ser uma string.")
@@ -281,78 +365,73 @@ async def search_terms(payload: Dict[str, str] = Body(...)) -> Dict[str, Any]:
281
  if len(context.strip()) == 0:
282
  raise HTTPException(status_code=400, detail="Campo 'context' não pode estar vazio.")
283
 
 
284
  # Gera os termos de pesquisa usando o Gemini
285
  terms = await generate_search_terms(context)
286
 
287
  if not terms:
288
  raise HTTPException(status_code=500, detail="Não foi possível gerar termos de pesquisa válidos.")
289
 
290
- used_urls = set()
291
- search_semaphore = asyncio.Semaphore(20)
292
- extract_semaphore = asyncio.Semaphore(50)
293
 
294
- async def search_with_limit(client, term):
295
- async with search_semaphore:
296
- return await search_brave_term(client, term)
 
 
 
 
 
 
297
 
298
- async def process_term(session, term, search_results):
299
- async with extract_semaphore:
300
- for result in search_results:
301
- url = result["url"]
302
- age = result["age"]
303
-
304
- if url in used_urls:
305
- continue
306
-
307
- text = await extract_article_text(url, session)
308
- if text:
309
- used_urls.add(url)
310
- return {
311
- "term": term,
312
- "age": age,
313
- "url": url,
314
- "text": text
315
- }
316
- return None
317
-
318
- connector = aiohttp.TCPConnector(limit=100, limit_per_host=15)
319
- timeout = aiohttp.ClientTimeout(total=15)
320
 
321
- async with httpx.AsyncClient(
322
- timeout=15.0,
323
- limits=httpx.Limits(max_connections=100, max_keepalive_connections=25)
324
- ) as http_client:
325
  async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
 
 
 
 
 
 
 
 
 
326
 
327
- search_tasks = [search_with_limit(http_client, term) for term in terms]
328
- search_results = await asyncio.gather(*search_tasks, return_exceptions=True)
329
 
330
- process_tasks = []
331
- for term, results in zip(terms, search_results):
332
- if isinstance(results, list) and results:
333
- process_tasks.append(process_term(session, term, results))
334
 
335
- if process_tasks:
336
- processed_results = await asyncio.gather(*process_tasks, return_exceptions=True)
337
- final_results = [r for r in processed_results if r is not None and not isinstance(r, Exception)]
338
- else:
339
- final_results = []
340
 
341
- # Cria o JSON final
342
  result_data = {"results": final_results}
 
343
 
344
- # Cria arquivo temporário
345
- temp_file_info = create_temp_file(result_data)
346
 
347
  return {
348
  "message": "Dados salvos em arquivo temporário",
349
  "total_results": len(final_results),
350
  "context": context,
351
  "generated_terms": terms,
352
- "file_info": temp_file_info
 
353
  }
354
 
355
-
356
  @router.get("/download-temp/{file_id}")
357
  async def download_temp_file(file_id: str):
358
  """Endpoint para download do arquivo temporário"""
 
10
  import time
11
  from pathlib import Path
12
  from urllib.parse import urlparse
13
+ from typing import List, Dict, Any, Optional, Set, Tuple
14
  from fastapi import APIRouter, HTTPException, Body
15
  from fastapi.responses import FileResponse
16
  from newspaper import Article
17
  from threading import Timer
18
  from google import genai
19
  from google.genai import types
20
+ from asyncio import Queue, create_task, gather
21
+ from concurrent.futures import ThreadPoolExecutor
22
+ import aiofiles
23
+ import ujson # JSON mais rápido
24
 
25
  router = APIRouter()
26
 
 
45
  "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
46
  ]
47
 
48
+ BLOCKED_DOMAINS = frozenset({ # frozenset é mais rápido para lookup
49
+ "reddit.com", "www.reddit.com", "old.reddit.com",
50
+ "quora.com", "www.quora.com"
51
+ })
52
 
53
  MAX_TEXT_LENGTH = 4000
54
+ MAX_CONCURRENT_SEARCHES = 30 # Aumentado
55
+ MAX_CONCURRENT_EXTRACTIONS = 80 # Aumentado significativamente
56
+ EXTRACTION_TIMEOUT = 8 # Reduzido
57
+ HTTP_TIMEOUT = 10 # Reduzido
58
 
59
  # Diretório para arquivos temporários
60
  TEMP_DIR = Path("/tmp")
 
63
  # Dicionário para controlar arquivos temporários
64
  temp_files = {}
65
 
66
+ # Pool de threads para operações CPU-intensive
67
+ thread_pool = ThreadPoolExecutor(max_workers=20)
68
+
69
+ # Cache de domínios bloqueados para evitar verificações repetidas
70
+ domain_cache = {}
71
 
72
  def is_blocked_domain(url: str) -> bool:
73
  try:
74
  host = urlparse(url).netloc.lower()
75
+
76
+ # Cache lookup
77
+ if host in domain_cache:
78
+ return domain_cache[host]
79
+
80
+ is_blocked = any(host == b or host.endswith("." + b) for b in BLOCKED_DOMAINS)
81
+ domain_cache[host] = is_blocked
82
+ return is_blocked
83
  except Exception:
84
  return False
85
 
 
86
  def clamp_text(text: str) -> str:
87
+ if not text or len(text) <= MAX_TEXT_LENGTH:
88
+ return text
89
+ return text[:MAX_TEXT_LENGTH]
 
 
 
90
 
91
  def get_realistic_headers() -> Dict[str, str]:
92
  return {
 
94
  "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
95
  "Accept-Language": "en-US,en;q=0.7,pt-BR;q=0.6",
96
  "Connection": "keep-alive",
97
+ "Accept-Encoding": "gzip, deflate, br",
98
  }
99
 
 
100
  def delete_temp_file(file_id: str, file_path: Path):
101
  """Remove arquivo temporário após expiração"""
102
  try:
 
107
  except Exception as e:
108
  print(f"Erro ao remover arquivo temporário: {e}")
109
 
110
+ async def create_temp_file(data: Dict[str, Any]) -> Dict[str, str]:
111
+ """Cria arquivo temporário assíncrono e agenda sua remoção"""
 
112
  file_id = str(uuid.uuid4())
113
  file_path = TEMP_DIR / f"fontes_{file_id}.txt"
114
 
115
+ # Salva o JSON no arquivo de forma assíncrona
116
+ async with aiofiles.open(file_path, 'w', encoding='utf-8') as f:
117
+ await f.write(ujson.dumps(data, ensure_ascii=False, indent=2))
118
 
119
+ # Agenda remoção em 24 horas
120
  timer = Timer(86400, delete_temp_file, args=[file_id, file_path])
121
  timer.start()
122
 
 
133
  "expires_in_hours": 24
134
  }
135
 
 
136
  async def generate_search_terms(context: str) -> List[str]:
137
  """Gera termos de pesquisa usando o modelo Gemini"""
138
  try:
 
179
  ]
180
 
181
  generate_content_config = types.GenerateContentConfig(
182
+ thinking_config=types.ThinkingConfig(thinking_budget=0),
 
 
183
  )
184
 
185
  # Coletamos toda a resposta em stream
 
194
 
195
  # Tenta extrair o JSON da resposta
196
  try:
 
197
  clean_response = full_response.strip()
198
  if clean_response.startswith("```json"):
199
  clean_response = clean_response[7:]
 
201
  clean_response = clean_response[:-3]
202
  clean_response = clean_response.strip()
203
 
204
+ response_data = ujson.loads(clean_response)
 
205
  terms = response_data.get("terms", [])
206
 
 
207
  if not isinstance(terms, list):
208
  raise ValueError("Terms deve ser uma lista")
209
 
210
+ return terms[:20]
211
 
212
+ except (ujson.JSONDecodeError, ValueError) as e:
213
  print(f"Erro ao parsear resposta do Gemini: {e}")
 
 
214
  return []
215
 
216
  except Exception as e:
217
  print(f"Erro ao gerar termos de pesquisa: {str(e)}")
218
  return []
219
 
220
+ async def search_brave_batch(client: httpx.AsyncClient, terms: List[str]) -> List[Tuple[str, List[Dict[str, str]]]]:
221
+ """Busca múltiplos termos em paralelo com otimizações"""
222
+ semaphore = asyncio.Semaphore(MAX_CONCURRENT_SEARCHES)
223
 
224
+ async def search_single_term(term: str) -> Tuple[str, List[Dict[str, str]]]:
225
+ async with semaphore:
226
+ params = {"q": term, "count": 10, "safesearch": "off", "summary": "false"}
227
+
228
+ try:
229
+ resp = await client.get(BRAVE_SEARCH_URL, headers=BRAVE_HEADERS, params=params)
230
+ if resp.status_code != 200:
231
+ return (term, [])
232
 
233
+ data = resp.json()
234
+ results = []
 
 
 
 
 
235
 
236
+ if "web" in data and "results" in data["web"]:
237
+ for item in data["web"]["results"]:
238
+ url = item.get("url")
239
+ age = item.get("age", "Unknown")
240
+
241
+ if url and not is_blocked_domain(url):
242
+ results.append({"url": url, "age": age})
243
+
244
+ return (term, results)
245
+ except Exception as e:
246
+ print(f"Erro na busca do termo '{term}': {e}")
247
+ return (term, [])
248
+
249
+ # Executa todas as buscas em paralelo
250
+ tasks = [search_single_term(term) for term in terms]
251
+ return await gather(*tasks, return_exceptions=False)
252
 
253
+ def extract_with_trafilatura(html: str) -> str:
254
+ """Extração CPU-intensive executada em thread pool"""
255
+ try:
256
+ extracted = trafilatura.extract(html)
257
+ return extracted.strip() if extracted else ""
258
  except Exception:
259
+ return ""
 
260
 
261
+ def extract_with_newspaper(url: str) -> str:
262
+ """Extração com newspaper executada em thread pool"""
263
  try:
264
  art = Article(url)
265
  art.config.browser_user_agent = random.choice(USER_AGENTS)
266
+ art.config.request_timeout = 6
267
  art.config.number_threads = 1
 
268
  art.download()
269
  art.parse()
270
+ return (art.text or "").strip()
 
 
271
  except Exception:
272
+ return ""
273
 
274
+ async def extract_article_text_optimized(url: str, session: aiohttp.ClientSession) -> str:
275
+ """Extração de artigo otimizada com paralelização de métodos"""
276
+
277
+ # Tentativa 1: Newspaper em thread pool (paralelo com download HTTP)
278
+ newspaper_task = asyncio.create_task(
279
+ asyncio.get_event_loop().run_in_executor(thread_pool, extract_with_newspaper, url)
280
+ )
281
+
282
+ # Tentativa 2: Download HTTP e trafilatura
283
  try:
 
 
284
  headers = get_realistic_headers()
285
+ async with session.get(url, headers=headers, timeout=EXTRACTION_TIMEOUT) as resp:
286
  if resp.status != 200:
287
+ # Aguarda newspaper se HTTP falhou
288
+ newspaper_result = await newspaper_task
289
+ return clamp_text(newspaper_result) if newspaper_result and len(newspaper_result) > 100 else ""
290
 
291
  html = await resp.text()
292
 
293
+ # Verifica paywall rapidamente
294
+ if re.search(r"(paywall|subscribe|metered|registration|captcha|access denied)",
295
+ html[:2000], re.I): # Verifica apenas o início
296
+ newspaper_result = await newspaper_task
297
+ return clamp_text(newspaper_result) if newspaper_result and len(newspaper_result) > 100 else ""
298
+
299
+ # Extração com trafilatura em thread pool
300
+ trafilatura_task = asyncio.create_task(
301
+ asyncio.get_event_loop().run_in_executor(thread_pool, extract_with_trafilatura, html)
302
+ )
303
+
304
+ # Aguarda ambos os métodos e pega o melhor resultado
305
+ newspaper_result, trafilatura_result = await gather(newspaper_task, trafilatura_task)
306
+
307
+ # Escolhe o melhor resultado
308
+ best_result = ""
309
+ if trafilatura_result and len(trafilatura_result) > 100:
310
+ best_result = trafilatura_result
311
+ elif newspaper_result and len(newspaper_result) > 100:
312
+ best_result = newspaper_result
313
+
314
+ return clamp_text(best_result) if best_result else ""
315
 
316
  except Exception:
317
+ # Se tudo falhar, tenta pelo menos o newspaper
318
+ try:
319
+ newspaper_result = await newspaper_task
320
+ return clamp_text(newspaper_result) if newspaper_result and len(newspaper_result) > 100 else ""
321
+ except Exception:
322
+ return ""
323
+
324
+ async def process_urls_batch(session: aiohttp.ClientSession, urls_data: List[Tuple[str, str, str]]) -> List[Dict[str, Any]]:
325
+ """Processa URLs em lotes otimizados"""
326
+ semaphore = asyncio.Semaphore(MAX_CONCURRENT_EXTRACTIONS)
327
+ results = []
328
+ used_urls: Set[str] = set()
329
+
330
+ async def process_single_url(term: str, url: str, age: str) -> Optional[Dict[str, Any]]:
331
+ async with semaphore:
332
+ if url in used_urls:
333
+ return None
334
+
335
+ text = await extract_article_text_optimized(url, session)
336
+ if text:
337
+ used_urls.add(url)
338
+ return {
339
+ "term": term,
340
+ "age": age,
341
+ "url": url,
342
+ "text": text
343
+ }
344
+ return None
345
+
346
+ # Cria todas as tasks de uma vez
347
+ tasks = []
348
+ for term, url, age in urls_data:
349
+ tasks.append(process_single_url(term, url, age))
350
+
351
+ # Processa tudo em paralelo
352
+ processed_results = await gather(*tasks, return_exceptions=True)
353
+
354
+ # Filtra resultados válidos
355
+ return [r for r in processed_results if r is not None and not isinstance(r, Exception)]
356
 
357
  @router.post("/search-terms")
358
  async def search_terms(payload: Dict[str, str] = Body(...)) -> Dict[str, Any]:
359
+ start_time = time.time()
360
+
361
  context = payload.get("context")
362
  if not context or not isinstance(context, str):
363
  raise HTTPException(status_code=400, detail="Campo 'context' é obrigatório e deve ser uma string.")
 
365
  if len(context.strip()) == 0:
366
  raise HTTPException(status_code=400, detail="Campo 'context' não pode estar vazio.")
367
 
368
+ print(f"Iniciando geração de termos...")
369
  # Gera os termos de pesquisa usando o Gemini
370
  terms = await generate_search_terms(context)
371
 
372
  if not terms:
373
  raise HTTPException(status_code=500, detail="Não foi possível gerar termos de pesquisa válidos.")
374
 
375
+ print(f"Termos gerados em {time.time() - start_time:.2f}s. Iniciando buscas...")
 
 
376
 
377
+ # Configurações otimizadas para conexões
378
+ connector = aiohttp.TCPConnector(
379
+ limit=200, # Aumentado
380
+ limit_per_host=30, # Aumentado
381
+ ttl_dns_cache=300,
382
+ use_dns_cache=True,
383
+ enable_cleanup_closed=True
384
+ )
385
+ timeout = aiohttp.ClientTimeout(total=HTTP_TIMEOUT, connect=5)
386
 
387
+ # Cliente HTTP otimizado
388
+ http_client = httpx.AsyncClient(
389
+ timeout=HTTP_TIMEOUT,
390
+ limits=httpx.Limits(
391
+ max_connections=200, # Aumentado
392
+ max_keepalive_connections=50 # Aumentado
393
+ ),
394
+ http2=True # Ativa HTTP/2
395
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
396
 
397
+ try:
 
 
 
398
  async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
399
+ # Fase 1: Busca todos os termos em paralelo
400
+ search_results = await search_brave_batch(http_client, terms)
401
+ print(f"Buscas concluídas em {time.time() - start_time:.2f}s. Iniciando extrações...")
402
+
403
+ # Fase 2: Prepara dados para extração em lote
404
+ urls_data = []
405
+ for term, results in search_results:
406
+ for result in results:
407
+ urls_data.append((term, result["url"], result["age"]))
408
 
409
+ print(f"Processando {len(urls_data)} URLs...")
 
410
 
411
+ # Fase 3: Processa todas as URLs em paralelo
412
+ final_results = await process_urls_batch(session, urls_data)
 
 
413
 
414
+ print(f"Extração concluída em {time.time() - start_time:.2f}s. Salvando arquivo...")
415
+
416
+ finally:
417
+ await http_client.aclose()
 
418
 
419
+ # Fase 4: Cria arquivo temporário assíncrono
420
  result_data = {"results": final_results}
421
+ temp_file_info = await create_temp_file(result_data)
422
 
423
+ total_time = time.time() - start_time
424
+ print(f"Processo completo em {total_time:.2f}s")
425
 
426
  return {
427
  "message": "Dados salvos em arquivo temporário",
428
  "total_results": len(final_results),
429
  "context": context,
430
  "generated_terms": terms,
431
+ "file_info": temp_file_info,
432
+ "processing_time": f"{total_time:.2f}s"
433
  }
434
 
 
435
  @router.get("/download-temp/{file_id}")
436
  async def download_temp_file(file_id: str):
437
  """Endpoint para download do arquivo temporário"""