Spaces:

understanding
/

tebrox

Sleeping

App Files Files Community

understanding commited on Jun 11

Commit

c3ebde1

verified ·

1 Parent(s): 13f32c8

Update terabox_utils.py

Browse files

Files changed (1) hide show

terabox_utils.py +130 -68

terabox_utils.py CHANGED Viewed

@@ -1,87 +1,149 @@
 # terabox_utils.py
 import re
-import aiohttp
-import os
 import asyncio
 import logging
-import uuid
-logger = logging.getLogger(__name__)
-# List of supported domains
-TERABOX_DOMAINS = [
-    "terabox.com",
-    "teraboxapp.com",
-    "terasharelink.com",
-    "1024tera.com",
-    "freeterabox.com",
-    "4funbox.com",
-    "box-links.com",
-]
-# --- Extract short_id ---
-async def extract_terabox_short_id(url: str) -> str | None:
-    pattern = re.compile(
-        r"https?://(?:"
-        + "|".join(re.escape(domain) for domain in TERABOX_DOMAINS)
-        + r")/s/([a-zA-Z0-9_-]+)"
-    )
-    match = pattern.search(url)
-    if match:
-        return match.group(1)
     return None
-# --- Get direct URL and filename ---
-async def get_final_url_and_filename(original_link: str) -> tuple[str | None, str | None, str | None]:
-    try:
-        async with aiohttp.ClientSession() as session:
-            async with session.get(original_link, allow_redirects=True) as resp:
-                if resp.status != 200:
-                    return None, None, f"Failed to access link: HTTP {resp.status}"
-                html = await resp.text()
-        # Try to extract filename and direct URL (depends on Terabox's HTML structure — needs to be robust!)
-        file_name_match = re.search(r'"file_name":"(.*?)"', html)
-        dlink_match = re.search(r'"dlink":"(https:[^"]+)"', html)
-        if not dlink_match:
-            return None, None, "Failed to extract direct link (dlink) from Terabox page."
-        download_url = dlink_match.group(1).encode('utf-8').decode('unicode_escape')
-        raw_filename = "file_from_terabox_" + str(uuid.uuid4()) if not file_name_match else file_name_match.group(1)
-        logger.info(f"Resolved direct URL: {download_url}, filename: {raw_filename}")
-        return download_url, raw_filename, None
     except Exception as e:
-        logger.exception("Error in get_final_url_and_filename()")
-        return None, None, str(e)
-# --- Download file ---
-async def download_terabox_file(bot, chat_id: int, status_message_id: int, download_url: str, raw_filename: str) -> tuple[str | None, str | None, str | None]:
-    try:
-        local_filepath = os.path.join("downloads", raw_filename)
-        async with aiohttp.ClientSession() as session:
-            async with session.get(download_url) as resp:
-                if resp.status != 200:
-                    return None, None, f"Failed to download file: HTTP {resp.status}"
-                with open(local_filepath, "wb") as f:
-                    while True:
-                        chunk = await resp.content.read(1024 * 1024)
-                        if not chunk:
-                            break
-                        f.write(chunk)
-        # You may implement optional fast thumbnail here — for now we'll just skip thumb
-        thumb_path = None
-        return local_filepath, thumb_path, None
     except Exception as e:
-        logger.exception("Error in download_terabox_file()")
-        return None, None, str(e)

 # terabox_utils.py
 import re
+import requests
 import asyncio
+from functools import partial
 import logging
+import os
+import time
+import math
+import subprocess
+from typing import Optional, Tuple
+import config
+logger = logging.getLogger(__name__)
+os.makedirs("downloads", exist_ok=True)
+# --- Utility Functions ---
+def format_bytes(size_bytes: int) -> str:
+    if size_bytes <= 0:
+        return "0 B"
+    size_name = ("B", "KB", "MB", "GB", "TB")
+    i = min(int(math.log(size_bytes, 1024)), len(size_name) - 1)
+    p = math.pow(1024, i)
+    s = round(size_bytes / p, 2)
+    return f"{s} {size_name[i]}"
+async def extract_terabox_short_id(full_url: str) -> Optional[str]:
+    patterns = [
+        r'terabox\.com/s/([a-zA-Z0-9_-]+)',
+        r'teraboxapp\.com/s/([a-zA-Z0-9_-]+)',
+        r'1024tera\.com/s/([a-zA-Z0-9_-]+)',
+        r'freeterabox\.com/s/([a-zA-Z0-9_-]+)',
+        r'terabox\.com/sharing/link\?surl=([a-zA-Z0-9_-]+)',
+        r'terasharelink\.com/s/([a-zA-Z0-9_-]+)',
+        r'4funbox\.com/s/([a-zA-Z0-9_-]+)',
+        r'box-links\.com/s/([a-zA-Z0-9_-]+)'
+    ]
+    for p in patterns:
+        if m := re.search(p, full_url, re.I):
+            return m.group(1)
     return None
+# --- Main Link Extraction ---
+async def get_final_url_and_filename(original_link: str) -> Tuple[Optional[str], Optional[str], Optional[str]]:
+    # First, try WORKER API
+    payload = {"link": original_link}
+    headers = {"User-Agent": "Mozilla/5.0"}
+    try:
+        loop = asyncio.get_event_loop()
+        r = await loop.run_in_executor(
+            None,
+            partial(requests.post, config.TERABOX_WORKER_URL, headers=headers, json=payload, timeout=30)
+        )
+        r.raise_for_status()
+        data = r.json()
+        dl = data.get("proxy_url")
+        fn = data.get("file_name")
+        if data.get("error") or not dl or not fn:
+            return None, None, data.get('error', 'Worker returned incomplete data.')
+        return dl, fn, None
     except Exception as e:
+        logger.error(f"[Worker] Failed for {original_link} → {str(e)}")
+        return None, None, f"Worker API failed: {str(e)}"
+# --- Downloader ---
+async def download_terabox_file(bot_instance, chat_id, msg_id, url, filename):
+    safe_fn = re.sub(r'[\\/*?:"<>|]', "_", filename)[:200]
+    raw_download_path = os.path.join("downloads", f"{chat_id}_{time.time()}_{safe_fn}")
+    try:
+        loop = asyncio.get_event_loop()
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
+            "Accept": "*/*",
+            "Referer": "https://teraboxapp.com/"
+        }
+        r = await loop.run_in_executor(
+            None,
+            partial(requests.get, url, headers=headers, stream=True, timeout=(10, 300), allow_redirects=True)
+        )
+        r.raise_for_status()
+        total_size = int(r.headers.get('content-length', 0))
+        dl_size = 0
+        last_update = time.time()
+        with open(raw_download_path, 'wb') as f:
+            for chunk in r.iter_content(chunk_size=1024 * 1024):
+                if chunk:
+                    f.write(chunk)
+                    dl_size += len(chunk)
+                    if time.time() - last_update > 2.5:
+                        pct = (dl_size / total_size * 100) if total_size > 0 else 0
+                        prog_text = (
+                            f"📥 **Downloading:** `{filename}`\n"
+                            f"Progress: {format_bytes(dl_size)}/{format_bytes(total_size)} ({pct:.1f}%)"
+                        )
+                        try:
+                            await bot_instance.edit_message_text(
+                                chat_id, msg_id, prog_text, parse_mode="Markdown"
+                            )
+                        except Exception:
+                            pass
+                        last_update = time.time()
+        # Now decide whether to convert
+        if config.ENABLE_FFMPEG_CONVERT:
+            # Optional ffmpeg convert (if ENABLE_FFMPEG_CONVERT = True)
+            new_filename = os.path.splitext(safe_fn)[0] + "_converted.mp4"
+            converted_path = os.path.join("downloads", f"{chat_id}_{time.time()}_{new_filename}")
+            thumb_path = os.path.join("downloads", f"{chat_id}_{time.time()}_thumb.jpg")
+            # Convert to mp4
+            ffmpeg_convert_cmd = [
+                "ffmpeg", "-y", "-i", raw_download_path,
+                "-c:v", "libx264", "-preset", "fast", "-c:a", "aac",
+                "-movflags", "+faststart", converted_path
+            ]
+            subprocess.run(ffmpeg_convert_cmd, check=True)
+            # Generate thumbnail
+            ffmpeg_thumb_cmd = [
+                "ffmpeg", "-y", "-i", converted_path,
+                "-ss", "00:00:01.000", "-vframes", "1", thumb_path
+            ]
+            subprocess.run(ffmpeg_thumb_cmd, check=True)
+            # Cleanup raw file
+            os.remove(raw_download_path)
+            return converted_path, thumb_path, None
+        else:
+            # No conversion, return raw path
+            return raw_download_path, None, None
     except Exception as e:
+        if os.path.exists(raw_download_path):
+            os.remove(raw_download_path)
+        return None, None, str(e)