# terabox_utils.py import re import aiohttp import os import asyncio import logging import uuid logger = logging.getLogger(__name__) # List of supported domains TERABOX_DOMAINS = [ "terabox.com", "teraboxapp.com", "terasharelink.com", "1024tera.com", "freeterabox.com", "4funbox.com", "box-links.com", ] # --- Extract short_id --- async def extract_terabox_short_id(url: str) -> str | None: pattern = re.compile( r"https?://(?:" + "|".join(re.escape(domain) for domain in TERABOX_DOMAINS) + r")/s/([a-zA-Z0-9_-]+)" ) match = pattern.search(url) if match: return match.group(1) return None # --- Get direct URL and filename --- async def get_final_url_and_filename(original_link: str) -> tuple[str | None, str | None, str | None]: try: async with aiohttp.ClientSession() as session: async with session.get(original_link, allow_redirects=True) as resp: if resp.status != 200: return None, None, f"Failed to access link: HTTP {resp.status}" html = await resp.text() # Try to extract filename and direct URL (depends on Terabox's HTML structure — needs to be robust!) file_name_match = re.search(r'"file_name":"(.*?)"', html) dlink_match = re.search(r'"dlink":"(https:[^"]+)"', html) if not dlink_match: return None, None, "Failed to extract direct link (dlink) from Terabox page." download_url = dlink_match.group(1).encode('utf-8').decode('unicode_escape') raw_filename = "file_from_terabox_" + str(uuid.uuid4()) if not file_name_match else file_name_match.group(1) logger.info(f"Resolved direct URL: {download_url}, filename: {raw_filename}") return download_url, raw_filename, None except Exception as e: logger.exception("Error in get_final_url_and_filename()") return None, None, str(e) # --- Download file --- async def download_terabox_file(bot, chat_id: int, status_message_id: int, download_url: str, raw_filename: str) -> tuple[str | None, str | None, str | None]: try: local_filepath = os.path.join("downloads", raw_filename) async with aiohttp.ClientSession() as session: async with session.get(download_url) as resp: if resp.status != 200: return None, None, f"Failed to download file: HTTP {resp.status}" with open(local_filepath, "wb") as f: while True: chunk = await resp.content.read(1024 * 1024) if not chunk: break f.write(chunk) # You may implement optional fast thumbnail here — for now we'll just skip thumb thumb_path = None return local_filepath, thumb_path, None except Exception as e: logger.exception("Error in download_terabox_file()") return None, None, str(e)