whisper-tg

Paused

App Files Files Community

muhtasham commited on Mar 21

Commit

478eee2

1 Parent(s): 7b9063f

WIP

Browse files

Files changed (1) hide show

app.py +56 -41

app.py CHANGED Viewed

@@ -9,10 +9,9 @@ import os
 import time
 import subprocess
 from loguru import logger
-import browser_cookie3  # Add this import for browser cookies
 MODEL_NAME = "muhtasham/whisper-tg"
-BATCH_SIZE = 8
 FILE_LIMIT_MB = 1000
 YT_LENGTH_LIMIT_S = 3600  # limit to 1 hour YouTube files
@@ -37,12 +36,21 @@ pipe = pipeline(
 )
 @spaces.GPU
-def transcribe(inputs, task):
     if inputs is None:
         raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
-    text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
-    return text
 def _return_yt_html_embed(yt_url):
     try:
@@ -56,33 +64,10 @@ def _return_yt_html_embed(yt_url):
         logger.error(f"Error creating embed HTML: {str(e)}")
         raise gr.Error("Invalid YouTube URL format")
-def get_youtube_cookies():
-    """Get YouTube cookies from the browser"""
-    try:
-        # Try Chrome first
-        cookies = browser_cookie3.chrome(domain_name='.youtube.com')
-    except:
-        try:
-            # Try Firefox if Chrome fails
-            cookies = browser_cookie3.firefox(domain_name='.youtube.com')
-        except:
-            try:
-                # Try Safari if Firefox fails
-                cookies = browser_cookie3.safari(domain_name='.youtube.com')
-            except Exception as e:
-                logger.warning(f"Could not get browser cookies: {str(e)}")
-                return None
-    # Convert cookies to the format yt-dlp expects
-    return {cookie.name: cookie.value for cookie in cookies}
 def download_yt_audio(yt_url, filename):
     logger.info(f"Starting download for URL: {yt_url}")
-    # Get YouTube cookies
-    cookies = get_youtube_cookies()
-    # Configure yt-dlp options
     ydl_opts = {
         "format": "bestaudio/best",
         "postprocessors": [{
@@ -99,13 +84,25 @@ def download_yt_audio(yt_url, filename):
         "ignoreerrors": False,
         "logtostderr": False,
         "verbose": False,
     }
-    # Add cookies if available
-    if cookies:
-        ydl_opts["cookiesfrombrowser"] = ("chrome",)  # or "firefox" or "safari"
-        logger.info("Using browser cookies for YouTube authentication")
     try:
         # First, get video info without downloading
         with youtube_dl.YoutubeDL({"quiet": True}) as ydl:
@@ -151,7 +148,7 @@ def download_yt_audio(yt_url, filename):
         raise gr.Error(f"An unexpected error occurred: {str(e)}")
 @spaces.GPU
-def yt_transcribe(yt_url, task, max_filesize=75.0):
     html_embed_str = _return_yt_html_embed(yt_url)
     with tempfile.TemporaryDirectory() as tmpdirname:
@@ -163,9 +160,17 @@ def yt_transcribe(yt_url, task, max_filesize=75.0):
     inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
     inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
-    text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
-    return html_embed_str, text
 demo = gr.Blocks(theme=gr.themes.Ocean())
@@ -174,7 +179,10 @@ mf_transcribe = gr.Interface(
     inputs=[
         gr.Audio(sources="microphone", type="filepath"),
     ],
-    outputs="text",
     title="Whisper Large V3 Turbo: Transcribe Audio",
     description=(
         "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
@@ -189,7 +197,10 @@ file_transcribe = gr.Interface(
     inputs=[
         gr.Audio(sources="upload", type="filepath", label="Audio file"),
     ],
-    outputs="text",
     title="Whisper Large V3: Transcribe Audio",
     description=(
         "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
@@ -204,7 +215,11 @@ yt_transcribe = gr.Interface(
     inputs=[
         gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
     ],
-    outputs=["html", "text"],
     title="Whisper Large V3: Transcribe YouTube",
     description=(
         "Transcribe long-form YouTube videos with the click of a button! Demo uses the checkpoint"

 import time
 import subprocess
 from loguru import logger
 MODEL_NAME = "muhtasham/whisper-tg"
+BATCH_SIZE = 32
 FILE_LIMIT_MB = 1000
 YT_LENGTH_LIMIT_S = 3600  # limit to 1 hour YouTube files
 )
 @spaces.GPU
+def transcribe(inputs):
     if inputs is None:
         raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
+    result = pipe(inputs, batch_size=BATCH_SIZE, return_timestamps=True)
+    # Format timestamps with text
+    timestamps = []
+    for chunk in result["chunks"]:
+        start_time = chunk["timestamp"][0]
+        end_time = chunk["timestamp"][1]
+        text = chunk["text"].strip()
+        timestamps.append(f"[{start_time:.2f}s - {end_time:.2f}s] {text}")
+    return result["text"], "\n".join(timestamps)
 def _return_yt_html_embed(yt_url):
     try:
         logger.error(f"Error creating embed HTML: {str(e)}")
         raise gr.Error("Invalid YouTube URL format")
 def download_yt_audio(yt_url, filename):
     logger.info(f"Starting download for URL: {yt_url}")
+    # Configure yt-dlp options with anti-bot detection measures
     ydl_opts = {
         "format": "bestaudio/best",
         "postprocessors": [{
         "ignoreerrors": False,
         "logtostderr": False,
         "verbose": False,
+        # Anti-bot detection options
+        "cookiesfrombrowser": ("chrome",),
+        "user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
+        "http_headers": {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
+            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+            "Accept-Language": "en-us,en;q=0.5",
+            "Sec-Fetch-Mode": "navigate",
+        },
+        "socket_timeout": 30,
+        "retries": 10,
+        "fragment_retries": 10,
+        "file_access_retries": 10,
+        "extractor_retries": 10,
+        "ignoreerrors": False,
+        "no_warnings": True,
+        "quiet": True,
     }
     try:
         # First, get video info without downloading
         with youtube_dl.YoutubeDL({"quiet": True}) as ydl:
         raise gr.Error(f"An unexpected error occurred: {str(e)}")
 @spaces.GPU
+def yt_transcribe(yt_url):
     html_embed_str = _return_yt_html_embed(yt_url)
     with tempfile.TemporaryDirectory() as tmpdirname:
     inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
     inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
+    result = pipe(inputs, batch_size=BATCH_SIZE, return_timestamps=True)
+    # Format timestamps with text
+    timestamps = []
+    for chunk in result["chunks"]:
+        start_time = chunk["timestamp"][0]
+        end_time = chunk["timestamp"][1]
+        text = chunk["text"].strip()
+        timestamps.append(f"[{start_time:.2f}s - {end_time:.2f}s] {text}")
+    return html_embed_str, result["text"], "\n".join(timestamps)
 demo = gr.Blocks(theme=gr.themes.Ocean())
     inputs=[
         gr.Audio(sources="microphone", type="filepath"),
     ],
+    outputs=[
+        gr.Textbox(label="Transcription", lines=10),
+        gr.Textbox(label="Timestamps", lines=10),
+    ],
     title="Whisper Large V3 Turbo: Transcribe Audio",
     description=(
         "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
     inputs=[
         gr.Audio(sources="upload", type="filepath", label="Audio file"),
     ],
+    outputs=[
+        gr.Textbox(label="Transcription", lines=10),
+        gr.Textbox(label="Timestamps", lines=10),
+    ],
     title="Whisper Large V3: Transcribe Audio",
     description=(
         "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
     inputs=[
         gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
     ],
+    outputs=[
+        gr.HTML(label="Video"),
+        gr.Textbox(label="Transcription", lines=10),
+        gr.Textbox(label="Timestamps", lines=10),
+    ],
     title="Whisper Large V3: Transcribe YouTube",
     description=(
         "Transcribe long-form YouTube videos with the click of a button! Demo uses the checkpoint"