understanding commited on
Commit
c3ebde1
·
verified ·
1 Parent(s): 13f32c8

Update terabox_utils.py

Browse files
Files changed (1) hide show
  1. terabox_utils.py +130 -68
terabox_utils.py CHANGED
@@ -1,87 +1,149 @@
1
  # terabox_utils.py
2
 
3
  import re
4
- import aiohttp
5
- import os
6
  import asyncio
 
7
  import logging
8
- import uuid
 
 
 
 
9
 
10
- logger = logging.getLogger(__name__)
11
 
12
- # List of supported domains
13
- TERABOX_DOMAINS = [
14
- "terabox.com",
15
- "teraboxapp.com",
16
- "terasharelink.com",
17
- "1024tera.com",
18
- "freeterabox.com",
19
- "4funbox.com",
20
- "box-links.com",
21
- ]
22
-
23
- # --- Extract short_id ---
24
- async def extract_terabox_short_id(url: str) -> str | None:
25
- pattern = re.compile(
26
- r"https?://(?:"
27
- + "|".join(re.escape(domain) for domain in TERABOX_DOMAINS)
28
- + r")/s/([a-zA-Z0-9_-]+)"
29
- )
30
- match = pattern.search(url)
31
- if match:
32
- return match.group(1)
 
 
 
 
 
 
33
  return None
34
 
35
- # --- Get direct URL and filename ---
36
- async def get_final_url_and_filename(original_link: str) -> tuple[str | None, str | None, str | None]:
37
- try:
38
- async with aiohttp.ClientSession() as session:
39
- async with session.get(original_link, allow_redirects=True) as resp:
40
- if resp.status != 200:
41
- return None, None, f"Failed to access link: HTTP {resp.status}"
42
- html = await resp.text()
43
-
44
- # Try to extract filename and direct URL (depends on Terabox's HTML structure — needs to be robust!)
45
- file_name_match = re.search(r'"file_name":"(.*?)"', html)
46
- dlink_match = re.search(r'"dlink":"(https:[^"]+)"', html)
47
-
48
- if not dlink_match:
49
- return None, None, "Failed to extract direct link (dlink) from Terabox page."
50
 
51
- download_url = dlink_match.group(1).encode('utf-8').decode('unicode_escape')
 
 
 
 
 
 
 
52
 
53
- raw_filename = "file_from_terabox_" + str(uuid.uuid4()) if not file_name_match else file_name_match.group(1)
 
54
 
55
- logger.info(f"Resolved direct URL: {download_url}, filename: {raw_filename}")
 
56
 
57
- return download_url, raw_filename, None
58
 
59
  except Exception as e:
60
- logger.exception("Error in get_final_url_and_filename()")
61
- return None, None, str(e)
62
-
63
- # --- Download file ---
64
- async def download_terabox_file(bot, chat_id: int, status_message_id: int, download_url: str, raw_filename: str) -> tuple[str | None, str | None, str | None]:
65
- try:
66
- local_filepath = os.path.join("downloads", raw_filename)
67
 
68
- async with aiohttp.ClientSession() as session:
69
- async with session.get(download_url) as resp:
70
- if resp.status != 200:
71
- return None, None, f"Failed to download file: HTTP {resp.status}"
72
 
73
- with open(local_filepath, "wb") as f:
74
- while True:
75
- chunk = await resp.content.read(1024 * 1024)
76
- if not chunk:
77
- break
78
- f.write(chunk)
79
-
80
- # You may implement optional fast thumbnail here — for now we'll just skip thumb
81
- thumb_path = None
82
-
83
- return local_filepath, thumb_path, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
  except Exception as e:
86
- logger.exception("Error in download_terabox_file()")
87
- return None, None, str(e)
 
 
1
  # terabox_utils.py
2
 
3
  import re
4
+ import requests
 
5
  import asyncio
6
+ from functools import partial
7
  import logging
8
+ import os
9
+ import time
10
+ import math
11
+ import subprocess
12
+ from typing import Optional, Tuple
13
 
14
+ import config
15
 
16
+ logger = logging.getLogger(__name__)
17
+ os.makedirs("downloads", exist_ok=True)
18
+
19
+ # --- Utility Functions ---
20
+ def format_bytes(size_bytes: int) -> str:
21
+ if size_bytes <= 0:
22
+ return "0 B"
23
+ size_name = ("B", "KB", "MB", "GB", "TB")
24
+ i = min(int(math.log(size_bytes, 1024)), len(size_name) - 1)
25
+ p = math.pow(1024, i)
26
+ s = round(size_bytes / p, 2)
27
+ return f"{s} {size_name[i]}"
28
+
29
+ async def extract_terabox_short_id(full_url: str) -> Optional[str]:
30
+ patterns = [
31
+ r'terabox\.com/s/([a-zA-Z0-9_-]+)',
32
+ r'teraboxapp\.com/s/([a-zA-Z0-9_-]+)',
33
+ r'1024tera\.com/s/([a-zA-Z0-9_-]+)',
34
+ r'freeterabox\.com/s/([a-zA-Z0-9_-]+)',
35
+ r'terabox\.com/sharing/link\?surl=([a-zA-Z0-9_-]+)',
36
+ r'terasharelink\.com/s/([a-zA-Z0-9_-]+)',
37
+ r'4funbox\.com/s/([a-zA-Z0-9_-]+)',
38
+ r'box-links\.com/s/([a-zA-Z0-9_-]+)'
39
+ ]
40
+ for p in patterns:
41
+ if m := re.search(p, full_url, re.I):
42
+ return m.group(1)
43
  return None
44
 
45
+ # --- Main Link Extraction ---
46
+ async def get_final_url_and_filename(original_link: str) -> Tuple[Optional[str], Optional[str], Optional[str]]:
47
+ # First, try WORKER API
48
+ payload = {"link": original_link}
49
+ headers = {"User-Agent": "Mozilla/5.0"}
 
 
 
 
 
 
 
 
 
 
50
 
51
+ try:
52
+ loop = asyncio.get_event_loop()
53
+ r = await loop.run_in_executor(
54
+ None,
55
+ partial(requests.post, config.TERABOX_WORKER_URL, headers=headers, json=payload, timeout=30)
56
+ )
57
+ r.raise_for_status()
58
+ data = r.json()
59
 
60
+ dl = data.get("proxy_url")
61
+ fn = data.get("file_name")
62
 
63
+ if data.get("error") or not dl or not fn:
64
+ return None, None, data.get('error', 'Worker returned incomplete data.')
65
 
66
+ return dl, fn, None
67
 
68
  except Exception as e:
69
+ logger.error(f"[Worker] Failed for {original_link} → {str(e)}")
70
+ return None, None, f"Worker API failed: {str(e)}"
 
 
 
 
 
71
 
72
+ # --- Downloader ---
73
+ async def download_terabox_file(bot_instance, chat_id, msg_id, url, filename):
74
+ safe_fn = re.sub(r'[\\/*?:"<>|]', "_", filename)[:200]
75
+ raw_download_path = os.path.join("downloads", f"{chat_id}_{time.time()}_{safe_fn}")
76
 
77
+ try:
78
+ loop = asyncio.get_event_loop()
79
+
80
+ headers = {
81
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
82
+ "Accept": "*/*",
83
+ "Referer": "https://teraboxapp.com/"
84
+ }
85
+
86
+ r = await loop.run_in_executor(
87
+ None,
88
+ partial(requests.get, url, headers=headers, stream=True, timeout=(10, 300), allow_redirects=True)
89
+ )
90
+ r.raise_for_status()
91
+
92
+ total_size = int(r.headers.get('content-length', 0))
93
+ dl_size = 0
94
+ last_update = time.time()
95
+
96
+ with open(raw_download_path, 'wb') as f:
97
+ for chunk in r.iter_content(chunk_size=1024 * 1024):
98
+ if chunk:
99
+ f.write(chunk)
100
+ dl_size += len(chunk)
101
+
102
+ if time.time() - last_update > 2.5:
103
+ pct = (dl_size / total_size * 100) if total_size > 0 else 0
104
+ prog_text = (
105
+ f"📥 **Downloading:** `{filename}`\n"
106
+ f"Progress: {format_bytes(dl_size)}/{format_bytes(total_size)} ({pct:.1f}%)"
107
+ )
108
+ try:
109
+ await bot_instance.edit_message_text(
110
+ chat_id, msg_id, prog_text, parse_mode="Markdown"
111
+ )
112
+ except Exception:
113
+ pass
114
+
115
+ last_update = time.time()
116
+
117
+ # Now decide whether to convert
118
+ if config.ENABLE_FFMPEG_CONVERT:
119
+ # Optional ffmpeg convert (if ENABLE_FFMPEG_CONVERT = True)
120
+ new_filename = os.path.splitext(safe_fn)[0] + "_converted.mp4"
121
+ converted_path = os.path.join("downloads", f"{chat_id}_{time.time()}_{new_filename}")
122
+ thumb_path = os.path.join("downloads", f"{chat_id}_{time.time()}_thumb.jpg")
123
+
124
+ # Convert to mp4
125
+ ffmpeg_convert_cmd = [
126
+ "ffmpeg", "-y", "-i", raw_download_path,
127
+ "-c:v", "libx264", "-preset", "fast", "-c:a", "aac",
128
+ "-movflags", "+faststart", converted_path
129
+ ]
130
+ subprocess.run(ffmpeg_convert_cmd, check=True)
131
+
132
+ # Generate thumbnail
133
+ ffmpeg_thumb_cmd = [
134
+ "ffmpeg", "-y", "-i", converted_path,
135
+ "-ss", "00:00:01.000", "-vframes", "1", thumb_path
136
+ ]
137
+ subprocess.run(ffmpeg_thumb_cmd, check=True)
138
+
139
+ # Cleanup raw file
140
+ os.remove(raw_download_path)
141
+ return converted_path, thumb_path, None
142
+ else:
143
+ # No conversion, return raw path
144
+ return raw_download_path, None, None
145
 
146
  except Exception as e:
147
+ if os.path.exists(raw_download_path):
148
+ os.remove(raw_download_path)
149
+ return None, None, str(e)