whisper-tg

Paused

App Files Files Community

muhtasham commited on Mar 21

Commit

daf7f7b

1 Parent(s): 2362603

WIP

Browse files

Files changed (2) hide show

app.py +87 -39
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -1,19 +1,31 @@
 import spaces
 import torch
 import gradio as gr
 import yt_dlp as youtube_dl
 from transformers import pipeline
 from transformers.pipelines.audio_utils import ffmpeg_read
 import tempfile
 import os
-MODEL_NAME = "openai/whisper-large-v3-turbo"
 BATCH_SIZE = 8
 FILE_LIMIT_MB = 1000
 YT_LENGTH_LIMIT_S = 3600  # limit to 1 hour YouTube files
 device = 0 if torch.cuda.is_available() else "cpu"
 pipe = pipeline(
@@ -23,54 +35,91 @@ pipe = pipeline(
     device=device,
 )
 @spaces.GPU
 def transcribe(inputs, task):
     if inputs is None:
         raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
-    text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
     return  text
 def _return_yt_html_embed(yt_url):
-    video_id = yt_url.split("?v=")[-1]
-    HTML_str = (
-        f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
-        " </center>"
-    )
-    return HTML_str
 def download_yt_audio(yt_url, filename):
-    info_loader = youtube_dl.YoutubeDL()
-    try:
-        info = info_loader.extract_info(yt_url, download=False)
-    except youtube_dl.utils.DownloadError as err:
-        raise gr.Error(str(err))
-    file_length = info["duration_string"]
-    file_h_m_s = file_length.split(":")
-    file_h_m_s = [int(sub_length) for sub_length in file_h_m_s]
-    if len(file_h_m_s) == 1:
-        file_h_m_s.insert(0, 0)
-    if len(file_h_m_s) == 2:
-        file_h_m_s.insert(0, 0)
-    file_length_s = file_h_m_s[0] * 3600 + file_h_m_s[1] * 60 + file_h_m_s[2]
-    if file_length_s > YT_LENGTH_LIMIT_S:
-        yt_length_limit_hms = time.strftime("%HH:%MM:%SS", time.gmtime(YT_LENGTH_LIMIT_S))
-        file_length_hms = time.strftime("%HH:%MM:%SS", time.gmtime(file_length_s))
-        raise gr.Error(f"Maximum YouTube length is {yt_length_limit_hms}, got {file_length_hms} YouTube video.")
-    ydl_opts = {"outtmpl": filename, "format": "worstvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best"}
-    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
-        try:
             ydl.download([yt_url])
-        except youtube_dl.utils.ExtractorError as err:
-            raise gr.Error(str(err))
 @spaces.GPU
 def yt_transcribe(yt_url, task, max_filesize=75.0):
@@ -89,7 +138,6 @@ def yt_transcribe(yt_url, task, max_filesize=75.0):
     return html_embed_str, text
 demo = gr.Blocks(theme=gr.themes.Ocean())
 mf_transcribe = gr.Interface(

 import spaces
 import torch
 import gradio as gr
 import yt_dlp as youtube_dl
 from transformers import pipeline
 from transformers.pipelines.audio_utils import ffmpeg_read
 import tempfile
 import os
+import time
+import subprocess
+from loguru import logger
+MODEL_NAME = "muhtasham/whisper-tg"
 BATCH_SIZE = 8
 FILE_LIMIT_MB = 1000
 YT_LENGTH_LIMIT_S = 3600  # limit to 1 hour YouTube files
+# Check if ffmpeg is installed
+def check_ffmpeg():
+    try:
+        subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True)
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        logger.error("ffmpeg is not installed. Please install ffmpeg to use this application.")
+        raise gr.Error("ffmpeg is not installed. Please install ffmpeg to use this application.")
+# Initialize ffmpeg check
+check_ffmpeg()
 device = 0 if torch.cuda.is_available() else "cpu"
 pipe = pipeline(
     device=device,
 )
 @spaces.GPU
 def transcribe(inputs, task):
     if inputs is None:
         raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
+    text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
     return  text
 def _return_yt_html_embed(yt_url):
+    try:
+        video_id = yt_url.split("?v=")[-1]
+        HTML_str = (
+            f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
+            " </center>"
+        )
+        return HTML_str
+    except Exception as e:
+        logger.error(f"Error creating embed HTML: {str(e)}")
+        raise gr.Error("Invalid YouTube URL format")
 def download_yt_audio(yt_url, filename):
+    logger.info(f"Starting download for URL: {yt_url}")
+    # Configure yt-dlp options
+    ydl_opts = {
+        "format": "bestaudio/best",
+        "postprocessors": [{
+            "key": "FFmpegExtractAudio",
+            "preferredcodec": "mp3",
+            "preferredquality": "192",
+        }],
+        "outtmpl": filename,
+        "quiet": True,
+        "no_warnings": True,
+        "extract_flat": False,
+        "force_generic_extractor": False,
+        "nocheckcertificate": True,
+        "ignoreerrors": False,
+        "logtostderr": False,
+        "verbose": False,
+    }
+    try:
+        # First, get video info without downloading
+        with youtube_dl.YoutubeDL({"quiet": True}) as ydl:
+            logger.info("Extracting video information...")
+            info = ydl.extract_info(yt_url, download=False)
+            # Check video duration
+            file_length = info.get("duration_string", "0:00:00")
+            file_h_m_s = file_length.split(":")
+            file_h_m_s = [int(sub_length) for sub_length in file_h_m_s]
+            if len(file_h_m_s) == 1:
+                file_h_m_s.insert(0, 0)
+            if len(file_h_m_s) == 2:
+                file_h_m_s.insert(0, 0)
+            file_length_s = file_h_m_s[0] * 3600 + file_h_m_s[1] * 60 + file_h_m_s[2]
+            if file_length_s > YT_LENGTH_LIMIT_S:
+                yt_length_limit_hms = time.strftime("%HH:%MM:%SS", time.gmtime(YT_LENGTH_LIMIT_S))
+                file_length_hms = time.strftime("%HH:%MM:%SS", time.gmtime(file_length_s))
+                raise gr.Error(f"Maximum YouTube length is {yt_length_limit_hms}, got {file_length_hms} YouTube video.")
+            # Check if video is age-restricted or private
+            if info.get("age_limit") or info.get("is_private"):
+                raise gr.Error("This video is age-restricted or private and cannot be processed.")
+            logger.info("Video information extracted successfully")
+        # Now download the audio
+        logger.info("Starting audio download...")
+        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
             ydl.download([yt_url])
+        logger.info("Audio download completed successfully")
+    except youtube_dl.utils.DownloadError as err:
+        logger.error(f"Download error: {str(err)}")
+        raise gr.Error(f"Failed to download video: {str(err)}")
+    except youtube_dl.utils.ExtractorError as err:
+        logger.error(f"Extraction error: {str(err)}")
+        raise gr.Error(f"Failed to extract video information: {str(err)}")
+    except Exception as e:
+        logger.error(f"Unexpected error: {str(e)}")
+        raise gr.Error(f"An unexpected error occurred: {str(e)}")
 @spaces.GPU
 def yt_transcribe(yt_url, task, max_filesize=75.0):
     return html_embed_str, text
 demo = gr.Blocks(theme=gr.themes.Ocean())
 mf_transcribe = gr.Interface(

requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 transformers
 yt-dlp

 transformers
 yt-dlp
+loguru