habulaj commited on
Commit
f83678a
·
verified ·
1 Parent(s): fbd0ae7

Update routers/searchterm.py

Browse files
Files changed (1) hide show
  1. routers/searchterm.py +102 -1
routers/searchterm.py CHANGED
@@ -5,10 +5,16 @@ import asyncio
5
  import httpx
6
  import aiohttp
7
  import trafilatura
 
 
 
 
8
  from urllib.parse import urlparse
9
  from typing import List, Dict, Any, Optional
10
  from fastapi import APIRouter, HTTPException, Body
 
11
  from newspaper import Article
 
12
 
13
  router = APIRouter()
14
 
@@ -34,6 +40,13 @@ BLOCKED_DOMAINS = {"reddit.com", "www.reddit.com", "old.reddit.com",
34
 
35
  MAX_TEXT_LENGTH = 4000
36
 
 
 
 
 
 
 
 
37
 
38
  def is_blocked_domain(url: str) -> bool:
39
  try:
@@ -60,6 +73,44 @@ def get_realistic_headers() -> Dict[str, str]:
60
  }
61
 
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  async def search_brave_term(client: httpx.AsyncClient, term: str) -> List[Dict[str, str]]:
64
  params = {"q": term, "count": 10, "safesearch": "off", "summary": "false"}
65
 
@@ -180,4 +231,54 @@ async def search_terms(payload: Dict[str, List[str]] = Body(...)) -> Dict[str, A
180
  else:
181
  final_results = []
182
 
183
- return {"results": final_results}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  import httpx
6
  import aiohttp
7
  import trafilatura
8
+ import json
9
+ import uuid
10
+ import time
11
+ from pathlib import Path
12
  from urllib.parse import urlparse
13
  from typing import List, Dict, Any, Optional
14
  from fastapi import APIRouter, HTTPException, Body
15
+ from fastapi.responses import FileResponse
16
  from newspaper import Article
17
+ from threading import Timer
18
 
19
  router = APIRouter()
20
 
 
40
 
41
  MAX_TEXT_LENGTH = 4000
42
 
43
+ # Diretório para arquivos temporários
44
+ TEMP_DIR = Path("/tmp")
45
+ TEMP_DIR.mkdir(exist_ok=True)
46
+
47
+ # Dicionário para controlar arquivos temporários
48
+ temp_files = {}
49
+
50
 
51
  def is_blocked_domain(url: str) -> bool:
52
  try:
 
73
  }
74
 
75
 
76
+ def delete_temp_file(file_id: str, file_path: Path):
77
+ """Remove arquivo temporário após expiração"""
78
+ try:
79
+ if file_path.exists():
80
+ file_path.unlink()
81
+ temp_files.pop(file_id, None)
82
+ print(f"Arquivo temporário removido: {file_path}")
83
+ except Exception as e:
84
+ print(f"Erro ao remover arquivo temporário: {e}")
85
+
86
+
87
+ def create_temp_file(data: Dict[str, Any]) -> Dict[str, str]:
88
+ """Cria arquivo temporário e agenda sua remoção"""
89
+ file_id = str(uuid.uuid4())
90
+ file_path = TEMP_DIR / f"fontes_{file_id}.txt"
91
+
92
+ # Salva o JSON no arquivo
93
+ with open(file_path, 'w', encoding='utf-8') as f:
94
+ json.dump(data, f, ensure_ascii=False, indent=2)
95
+
96
+ # Agenda remoção em 24 horas (86400 segundos)
97
+ timer = Timer(86400, delete_temp_file, args=[file_id, file_path])
98
+ timer.start()
99
+
100
+ # Registra o arquivo temporário
101
+ temp_files[file_id] = {
102
+ "path": file_path,
103
+ "created_at": time.time(),
104
+ "timer": timer
105
+ }
106
+
107
+ return {
108
+ "file_id": file_id,
109
+ "download_url": f"/download-temp/{file_id}",
110
+ "expires_in_hours": 24
111
+ }
112
+
113
+
114
  async def search_brave_term(client: httpx.AsyncClient, term: str) -> List[Dict[str, str]]:
115
  params = {"q": term, "count": 10, "safesearch": "off", "summary": "false"}
116
 
 
231
  else:
232
  final_results = []
233
 
234
+ # Cria o JSON final
235
+ result_data = {"results": final_results}
236
+
237
+ # Cria arquivo temporário
238
+ temp_file_info = create_temp_file(result_data)
239
+
240
+ return {
241
+ "message": "Dados salvos em arquivo temporário",
242
+ "total_results": len(final_results),
243
+ "file_info": temp_file_info
244
+ }
245
+
246
+
247
+ @router.get("/download-temp/{file_id}")
248
+ async def download_temp_file(file_id: str):
249
+ """Endpoint para download do arquivo temporário"""
250
+ if file_id not in temp_files:
251
+ raise HTTPException(status_code=404, detail="Arquivo não encontrado ou expirado")
252
+
253
+ file_info = temp_files[file_id]
254
+ file_path = file_info["path"]
255
+
256
+ if not file_path.exists():
257
+ temp_files.pop(file_id, None)
258
+ raise HTTPException(status_code=404, detail="Arquivo não encontrado")
259
+
260
+ return FileResponse(
261
+ path=str(file_path),
262
+ filename="fontes.txt",
263
+ media_type="text/plain",
264
+ headers={"Content-Disposition": "attachment; filename=fontes.txt"}
265
+ )
266
+
267
+
268
+ @router.get("/temp-files/status")
269
+ async def get_temp_files_status():
270
+ """Endpoint para verificar status dos arquivos temporários (debug)"""
271
+ status = {}
272
+ current_time = time.time()
273
+
274
+ for file_id, info in temp_files.items():
275
+ age_hours = (current_time - info["created_at"]) / 3600
276
+ remaining_hours = max(0, 24 - age_hours)
277
+
278
+ status[file_id] = {
279
+ "age_hours": round(age_hours, 2),
280
+ "remaining_hours": round(remaining_hours, 2),
281
+ "exists": info["path"].exists()
282
+ }
283
+
284
+ return {"temp_files": status}