whisper-tg

Paused

App Files Files Community

muhtasham commited on Mar 21

Commit

1e5e969

1 Parent(s): 07c515e

WIP

Browse files

Files changed (1) hide show

app.py +27 -33

app.py CHANGED Viewed

@@ -1,14 +1,10 @@
-import spaces
-import torch
 import gradio as gr
-from transformers import pipeline
-from transformers.pipelines.audio_utils import ffmpeg_read
 import subprocess
 from loguru import logger
-MODEL_NAME = "muhtasham/whisper-tg"
-BATCH_SIZE = 8
-FILE_LIMIT_MB = 1000
 # Check if ffmpeg is installed
 def check_ffmpeg():
@@ -21,33 +17,33 @@ def check_ffmpeg():
 # Initialize ffmpeg check
 check_ffmpeg()
-device = 0 if torch.cuda.is_available() else "cpu"
-pipe = pipeline(
-    task="automatic-speech-recognition",
-    model=MODEL_NAME,
-    chunk_length_s=30,
-    device=device,
-)
-print(pipe)
-@spaces.GPU
 def transcribe(inputs):
     if inputs is None:
         raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
-    result = pipe(inputs, batch_size=BATCH_SIZE, return_timestamps=True)
-    # Format timestamps with text
-    timestamps = []
-    for chunk in result["chunks"]:
-        start_time = chunk["timestamp"][0]
-        end_time = chunk["timestamp"][1]
-        text = chunk["text"].strip()
-        timestamps.append(f"[{start_time:.2f}s - {end_time:.2f}s] {text} \n \n")
-    return result["text"], "\n".join(timestamps)
 demo = gr.Blocks(theme=gr.themes.Ocean())
@@ -62,9 +58,7 @@ mf_transcribe = gr.Interface(
     ],
     title="Whisper Large V3 Turbo: Transcribe Audio",
     description=(
-        "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
-        f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
-        " of arbitrary length."
     ),
     allow_flagging="never",
 )

 import gradio as gr
+import requests
 import subprocess
 from loguru import logger
+API_URL = "https://skdpcqcdd929o4k3.us-east-1.aws.endpoints.huggingface.cloud"
 # Check if ffmpeg is installed
 def check_ffmpeg():
 # Initialize ffmpeg check
 check_ffmpeg()
 def transcribe(inputs):
     if inputs is None:
         raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
+    headers = {
+        "Accept": "application/json",
+        "Content-Type": "audio/flac"
+    }
+    try:
+        with open(inputs, "rb") as f:
+            data = f.read()
+        response = requests.post(API_URL, headers=headers, data=data)
+        result = response.json()
+        # Format timestamps with text
+        timestamps = []
+        for chunk in result["chunks"]:
+            start_time = chunk["timestamp"][0]
+            end_time = chunk["timestamp"][1]
+            text = chunk["text"].strip()
+            timestamps.append(f"[{start_time:.2f}s - {end_time:.2f}s] {text} \n \n")
+        return result["text"], "\n".join(timestamps)
+    except Exception as e:
+        logger.error(f"Error during transcription: {str(e)}")
+        raise gr.Error(f"Failed to transcribe audio: {str(e)}")
 demo = gr.Blocks(theme=gr.themes.Ocean())
     ],
     title="Whisper Large V3 Turbo: Transcribe Audio",
     description=(
+        "Transcribe long-form microphone or audio inputs with the click of a button! "
     ),
     allow_flagging="never",
 )