Spaces:

Luigi
/

Whisper-vs-Sensevoice-Small

Running on Zero

App Files Files Community

Luigi commited on May 27

Commit

a8b6e59

1 Parent(s): 989fd65

initial commit

Browse files

Files changed (3) hide show

README.md +22 -1
app.py +109 -0
requirements.txt +0 -0

README.md CHANGED Viewed

@@ -11,4 +11,25 @@ license: mit
 short_description: Compare OpenAI Whisper against Sensevoice Small Resultssssss
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 short_description: Compare OpenAI Whisper against Sensevoice Small Resultssssss
 ---
+# Whisper vs. FunASR SenseVoice Comparison
+This Space lets you compare OpenAI Whisper variants against FunAudioLLM’s SenseVoice models for automatic speech recognition (ASR), all via a simple Gradio 5 UI.
+## 🚀 Demo
+1. **Select Whisper model** from the dropdown.
+2. **Select SenseVoice model** from the dropdown.
+3. (Optional) **Toggle punctuation** for SenseVoice.
+4. **Upload** an audio file (wav, mp3, etc.) or **record** with your microphone.
+5. Click **Transcribe** to run both ASRs side-by-side.
+## 📁 Files
+- **app.py**
+  Main Gradio application. Sets up two HF-ASR pipelines and displays their outputs.
+- **requirements.txt**
+  Python dependencies: Gradio, Transformers, Torch, Torchaudio, Accelerate, ffmpeg-python.
+- **readme.md**
+  This documentation.

app.py ADDED Viewed

	@@ -0,0 +1,109 @@

+# app.py
+import spaces
+import re
+import torch
+import gradio as gr
+from transformers import pipeline
+# List of Whisper model IDs
+WHISPER_MODELS = [
+    "openai/whisper-large-v3-turbo",
+    "openai/whisper-large-v3",
+    "openai/whisper-tiny",
+    "openai/whisper-small",
+    "openai/whisper-medium",
+    "openai/whisper-base",
+    "JacobLinCool/whisper-large-v3-turbo-common_voice_19_0-zh-TW",
+    "Jingmiao/whisper-small-zh_tw",
+    "DDTChen/whisper-medium-zh-tw",
+    "kimbochen/whisper-small-zh-tw",
+    "ChrisTorng/whisper-large-v3-turbo-common_voice_19_0-zh-TW-ct2",
+    "JacobLinCool/whisper-large-v3-turbo-zh-TW-clean-1",
+    "JunWorks/whisper-small-zhTW",
+    "WANGTINGTING/whisper-large-v2-zh-TW-vol2",
+    "xmzhu/whisper-tiny-zh-TW",
+    "ingrenn/whisper-small-common-voice-13-zh-TW",
+    "jun-han/whisper-small-zh-TW",
+    "xmzhu/whisper-tiny-zh-TW-baseline",
+    "JacobLinCool/whisper-large-v3-turbo-common_voice_16_1-zh-TW-2",
+    "JacobLinCool/whisper-large-v3-common_voice_19_0-zh-TW-full-1",
+    "momo103197/whisper-small-zh-TW-mix",
+    "JacobLinCool/whisper-large-v3-turbo-zh-TW-clean-1-merged",
+    "JacobLinCool/whisper-large-v2-common_voice_19_0-zh-TW-full-1",
+    "kimas1269/whisper-meduim_zhtw",
+    "JunWorks/whisper-base-zhTW",
+    "JunWorks/whisper-small-zhTW-frozenDecoder",
+    "sandy1990418/whisper-large-v3-turbo-zh-tw",
+    "JacobLinCool/whisper-large-v3-turbo-common_voice_16_1-zh-TW-pissa-merged",
+    "momo103197/whisper-small-zh-TW-16",
+    "k1nto/Belle-whisper-large-v3-zh-punct-ct2"
+]
+# List of SenseVoice model IDs
+SENSEVOICE_MODELS = [
+    "FunAudioLLM/SenseVoiceSmall",
+    "AXERA-TECH/SenseVoice",
+    "alextomcat/SenseVoiceSmall",
+    "ChenChenyu/SenseVoiceSmall-finetuned",
+    "apinge/sensevoice-small"
+]
+# Cache pipelines
+pipes = {}
+def get_asr_pipe(model_id):
+    if model_id not in pipes:
+        # run on GPU if available
+        device = 0 if torch.cuda.is_available() else -1
+        pipes[model_id] = pipeline("automatic-speech-recognition", model=model_id, device=device)
+    return pipes[model_id]
+@spaces.GPU
+def transcribe(whisper_model, sense_model, audio_path, enable_punct):
+    # 1) Whisper
+    whisper_pipe = get_asr_pipe(whisper_model)
+    whisper_out = whisper_pipe(audio_path)
+    text_whisper = whisper_out.get("text", "").strip()
+    # 2) SenseVoice
+    sense_pipe = get_asr_pipe(sense_model)
+    sense_out = sense_pipe(audio_path)
+    text_sense = sense_out.get("text", "").strip()
+    # 3) strip punctuation if disabled
+    if not enable_punct:
+        text_sense = re.sub(r"[^\w\s]", "", text_sense)
+    return text_whisper, text_sense
+with gr.Blocks() as demo:
+    gr.Markdown("## Whisper vs. FunASR SenseVoice Comparison")
+    with gr.Row():
+        whisper_dd = gr.Dropdown(
+            choices=WHISPER_MODELS,
+            value=WHISPER_MODELS[0],
+            label="Whisper Model"
+        )
+        sense_dd = gr.Dropdown(
+            choices=SENSEVOICE_MODELS,
+            value=SENSEVOICE_MODELS[0],
+            label="SenseVoice Model"
+        )
+    punct = gr.Checkbox(label="Enable Punctuation (SenseVoice)", value=True)
+    audio_in = gr.Audio(
+        source="upload+microphone",
+        type="filepath",
+        label="Upload or Record Audio"
+    )
+    with gr.Row():
+        out_whisper = gr.Textbox(label="Whisper Transcript")
+        out_sense = gr.Textbox(label="SenseVoice Transcript")
+    btn = gr.Button("Transcribe")
+    btn.click(
+        fn=transcribe,
+        inputs=[whisper_dd, sense_dd, audio_in, punct],
+        outputs=[out_whisper, out_sense]
+    )
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

File without changes