whisper-tg

Paused

App Files Files Community

muhtasham commited on Mar 21

Commit

cc96a73

1 Parent(s): 478eee2

WIP

Browse files

Files changed (2) hide show

app.py +3 -147
requirements.txt +0 -2

app.py CHANGED Viewed

@@ -1,19 +1,14 @@
 import spaces
 import torch
 import gradio as gr
-import yt_dlp as youtube_dl
 from transformers import pipeline
 from transformers.pipelines.audio_utils import ffmpeg_read
-import tempfile
-import os
-import time
 import subprocess
 from loguru import logger
 MODEL_NAME = "muhtasham/whisper-tg"
-BATCH_SIZE = 32
 FILE_LIMIT_MB = 1000
-YT_LENGTH_LIMIT_S = 3600  # limit to 1 hour YouTube files
 # Check if ffmpeg is installed
 def check_ffmpeg():
@@ -48,130 +43,10 @@ def transcribe(inputs):
         start_time = chunk["timestamp"][0]
         end_time = chunk["timestamp"][1]
         text = chunk["text"].strip()
-        timestamps.append(f"[{start_time:.2f}s - {end_time:.2f}s] {text}")
     return result["text"], "\n".join(timestamps)
-def _return_yt_html_embed(yt_url):
-    try:
-        video_id = yt_url.split("?v=")[-1]
-        HTML_str = (
-            f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
-            " </center>"
-        )
-        return HTML_str
-    except Exception as e:
-        logger.error(f"Error creating embed HTML: {str(e)}")
-        raise gr.Error("Invalid YouTube URL format")
-def download_yt_audio(yt_url, filename):
-    logger.info(f"Starting download for URL: {yt_url}")
-    # Configure yt-dlp options with anti-bot detection measures
-    ydl_opts = {
-        "format": "bestaudio/best",
-        "postprocessors": [{
-            "key": "FFmpegExtractAudio",
-            "preferredcodec": "mp3",
-            "preferredquality": "192",
-        }],
-        "outtmpl": filename,
-        "quiet": True,
-        "no_warnings": True,
-        "extract_flat": False,
-        "force_generic_extractor": False,
-        "nocheckcertificate": True,
-        "ignoreerrors": False,
-        "logtostderr": False,
-        "verbose": False,
-        # Anti-bot detection options
-        "cookiesfrombrowser": ("chrome",),
-        "user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
-        "http_headers": {
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
-            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
-            "Accept-Language": "en-us,en;q=0.5",
-            "Sec-Fetch-Mode": "navigate",
-        },
-        "socket_timeout": 30,
-        "retries": 10,
-        "fragment_retries": 10,
-        "file_access_retries": 10,
-        "extractor_retries": 10,
-        "ignoreerrors": False,
-        "no_warnings": True,
-        "quiet": True,
-    }
-    try:
-        # First, get video info without downloading
-        with youtube_dl.YoutubeDL({"quiet": True}) as ydl:
-            logger.info("Extracting video information...")
-            info = ydl.extract_info(yt_url, download=False)
-            # Check video duration
-            file_length = info.get("duration_string", "0:00:00")
-            file_h_m_s = file_length.split(":")
-            file_h_m_s = [int(sub_length) for sub_length in file_h_m_s]
-            if len(file_h_m_s) == 1:
-                file_h_m_s.insert(0, 0)
-            if len(file_h_m_s) == 2:
-                file_h_m_s.insert(0, 0)
-            file_length_s = file_h_m_s[0] * 3600 + file_h_m_s[1] * 60 + file_h_m_s[2]
-            if file_length_s > YT_LENGTH_LIMIT_S:
-                yt_length_limit_hms = time.strftime("%HH:%MM:%SS", time.gmtime(YT_LENGTH_LIMIT_S))
-                file_length_hms = time.strftime("%HH:%MM:%SS", time.gmtime(file_length_s))
-                raise gr.Error(f"Maximum YouTube length is {yt_length_limit_hms}, got {file_length_hms} YouTube video.")
-            # Check if video is age-restricted or private
-            if info.get("age_limit") or info.get("is_private"):
-                raise gr.Error("This video is age-restricted or private and cannot be processed.")
-            logger.info("Video information extracted successfully")
-        # Now download the audio
-        logger.info("Starting audio download...")
-        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
-            ydl.download([yt_url])
-        logger.info("Audio download completed successfully")
-    except youtube_dl.utils.DownloadError as err:
-        logger.error(f"Download error: {str(err)}")
-        raise gr.Error(f"Failed to download video: {str(err)}")
-    except youtube_dl.utils.ExtractorError as err:
-        logger.error(f"Extraction error: {str(err)}")
-        raise gr.Error(f"Failed to extract video information: {str(err)}")
-    except Exception as e:
-        logger.error(f"Unexpected error: {str(e)}")
-        raise gr.Error(f"An unexpected error occurred: {str(e)}")
-@spaces.GPU
-def yt_transcribe(yt_url):
-    html_embed_str = _return_yt_html_embed(yt_url)
-    with tempfile.TemporaryDirectory() as tmpdirname:
-        filepath = os.path.join(tmpdirname, "video.mp4")
-        download_yt_audio(yt_url, filepath)
-        with open(filepath, "rb") as f:
-            inputs = f.read()
-    inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
-    inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
-    result = pipe(inputs, batch_size=BATCH_SIZE, return_timestamps=True)
-    # Format timestamps with text
-    timestamps = []
-    for chunk in result["chunks"]:
-        start_time = chunk["timestamp"][0]
-        end_time = chunk["timestamp"][1]
-        text = chunk["text"].strip()
-        timestamps.append(f"[{start_time:.2f}s - {end_time:.2f}s] {text}")
-    return html_embed_str, result["text"], "\n".join(timestamps)
 demo = gr.Blocks(theme=gr.themes.Ocean())
 mf_transcribe = gr.Interface(
@@ -210,27 +85,8 @@ file_transcribe = gr.Interface(
     allow_flagging="never",
 )
-yt_transcribe = gr.Interface(
-    fn=yt_transcribe,
-    inputs=[
-        gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
-    ],
-    outputs=[
-        gr.HTML(label="Video"),
-        gr.Textbox(label="Transcription", lines=10),
-        gr.Textbox(label="Timestamps", lines=10),
-    ],
-    title="Whisper Large V3: Transcribe YouTube",
-    description=(
-        "Transcribe long-form YouTube videos with the click of a button! Demo uses the checkpoint"
-        f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe video files of"
-        " arbitrary length."
-    ),
-    allow_flagging="never",
-)
 with demo:
-    gr.TabbedInterface([mf_transcribe, file_transcribe, yt_transcribe], ["Microphone", "Audio file", "YouTube"])
 demo.queue().launch(ssr_mode=False)

 import spaces
 import torch
 import gradio as gr
 from transformers import pipeline
 from transformers.pipelines.audio_utils import ffmpeg_read
 import subprocess
 from loguru import logger
 MODEL_NAME = "muhtasham/whisper-tg"
+BATCH_SIZE = 8
 FILE_LIMIT_MB = 1000
 # Check if ffmpeg is installed
 def check_ffmpeg():
         start_time = chunk["timestamp"][0]
         end_time = chunk["timestamp"][1]
         text = chunk["text"].strip()
+        timestamps.append(f"[{start_time:.2f}s - {end_time:.2f}s] {text} \n \n")
     return result["text"], "\n".join(timestamps)
 demo = gr.Blocks(theme=gr.themes.Ocean())
 mf_transcribe = gr.Interface(
     allow_flagging="never",
 )
 with demo:
+    gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
 demo.queue().launch(ssr_mode=False)

requirements.txt CHANGED Viewed

@@ -1,4 +1,2 @@
 transformers
-yt-dlp
 loguru
-browser-cookie3


1	transformers

2	loguru