Luigi commited on
Commit
a8b6e59
·
1 Parent(s): 989fd65

initial commit

Browse files
Files changed (3) hide show
  1. README.md +22 -1
  2. app.py +109 -0
  3. requirements.txt +0 -0
README.md CHANGED
@@ -11,4 +11,25 @@ license: mit
11
  short_description: Compare OpenAI Whisper against Sensevoice Small Resultssssss
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  short_description: Compare OpenAI Whisper against Sensevoice Small Resultssssss
12
  ---
13
 
14
+ # Whisper vs. FunASR SenseVoice Comparison
15
+
16
+ This Space lets you compare OpenAI Whisper variants against FunAudioLLM’s SenseVoice models for automatic speech recognition (ASR), all via a simple Gradio 5 UI.
17
+
18
+ ## 🚀 Demo
19
+
20
+ 1. **Select Whisper model** from the dropdown.
21
+ 2. **Select SenseVoice model** from the dropdown.
22
+ 3. (Optional) **Toggle punctuation** for SenseVoice.
23
+ 4. **Upload** an audio file (wav, mp3, etc.) or **record** with your microphone.
24
+ 5. Click **Transcribe** to run both ASRs side-by-side.
25
+
26
+ ## 📁 Files
27
+
28
+ - **app.py**
29
+ Main Gradio application. Sets up two HF-ASR pipelines and displays their outputs.
30
+
31
+ - **requirements.txt**
32
+ Python dependencies: Gradio, Transformers, Torch, Torchaudio, Accelerate, ffmpeg-python.
33
+
34
+ - **readme.md**
35
+ This documentation.
app.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import spaces
3
+ import re
4
+ import torch
5
+ import gradio as gr
6
+ from transformers import pipeline
7
+
8
+ # List of Whisper model IDs
9
+ WHISPER_MODELS = [
10
+ "openai/whisper-large-v3-turbo",
11
+ "openai/whisper-large-v3",
12
+ "openai/whisper-tiny",
13
+ "openai/whisper-small",
14
+ "openai/whisper-medium",
15
+ "openai/whisper-base",
16
+ "JacobLinCool/whisper-large-v3-turbo-common_voice_19_0-zh-TW",
17
+ "Jingmiao/whisper-small-zh_tw",
18
+ "DDTChen/whisper-medium-zh-tw",
19
+ "kimbochen/whisper-small-zh-tw",
20
+ "ChrisTorng/whisper-large-v3-turbo-common_voice_19_0-zh-TW-ct2",
21
+ "JacobLinCool/whisper-large-v3-turbo-zh-TW-clean-1",
22
+ "JunWorks/whisper-small-zhTW",
23
+ "WANGTINGTING/whisper-large-v2-zh-TW-vol2",
24
+ "xmzhu/whisper-tiny-zh-TW",
25
+ "ingrenn/whisper-small-common-voice-13-zh-TW",
26
+ "jun-han/whisper-small-zh-TW",
27
+ "xmzhu/whisper-tiny-zh-TW-baseline",
28
+ "JacobLinCool/whisper-large-v3-turbo-common_voice_16_1-zh-TW-2",
29
+ "JacobLinCool/whisper-large-v3-common_voice_19_0-zh-TW-full-1",
30
+ "momo103197/whisper-small-zh-TW-mix",
31
+ "JacobLinCool/whisper-large-v3-turbo-zh-TW-clean-1-merged",
32
+ "JacobLinCool/whisper-large-v2-common_voice_19_0-zh-TW-full-1",
33
+ "kimas1269/whisper-meduim_zhtw",
34
+ "JunWorks/whisper-base-zhTW",
35
+ "JunWorks/whisper-small-zhTW-frozenDecoder",
36
+ "sandy1990418/whisper-large-v3-turbo-zh-tw",
37
+ "JacobLinCool/whisper-large-v3-turbo-common_voice_16_1-zh-TW-pissa-merged",
38
+ "momo103197/whisper-small-zh-TW-16",
39
+ "k1nto/Belle-whisper-large-v3-zh-punct-ct2"
40
+ ]
41
+
42
+ # List of SenseVoice model IDs
43
+ SENSEVOICE_MODELS = [
44
+ "FunAudioLLM/SenseVoiceSmall",
45
+ "AXERA-TECH/SenseVoice",
46
+ "alextomcat/SenseVoiceSmall",
47
+ "ChenChenyu/SenseVoiceSmall-finetuned",
48
+ "apinge/sensevoice-small"
49
+ ]
50
+
51
+ # Cache pipelines
52
+ pipes = {}
53
+
54
+ def get_asr_pipe(model_id):
55
+ if model_id not in pipes:
56
+ # run on GPU if available
57
+ device = 0 if torch.cuda.is_available() else -1
58
+ pipes[model_id] = pipeline("automatic-speech-recognition", model=model_id, device=device)
59
+ return pipes[model_id]
60
+
61
+ @spaces.GPU
62
+ def transcribe(whisper_model, sense_model, audio_path, enable_punct):
63
+ # 1) Whisper
64
+ whisper_pipe = get_asr_pipe(whisper_model)
65
+ whisper_out = whisper_pipe(audio_path)
66
+ text_whisper = whisper_out.get("text", "").strip()
67
+
68
+ # 2) SenseVoice
69
+ sense_pipe = get_asr_pipe(sense_model)
70
+ sense_out = sense_pipe(audio_path)
71
+ text_sense = sense_out.get("text", "").strip()
72
+
73
+ # 3) strip punctuation if disabled
74
+ if not enable_punct:
75
+ text_sense = re.sub(r"[^\w\s]", "", text_sense)
76
+
77
+ return text_whisper, text_sense
78
+
79
+ with gr.Blocks() as demo:
80
+ gr.Markdown("## Whisper vs. FunASR SenseVoice Comparison")
81
+ with gr.Row():
82
+ whisper_dd = gr.Dropdown(
83
+ choices=WHISPER_MODELS,
84
+ value=WHISPER_MODELS[0],
85
+ label="Whisper Model"
86
+ )
87
+ sense_dd = gr.Dropdown(
88
+ choices=SENSEVOICE_MODELS,
89
+ value=SENSEVOICE_MODELS[0],
90
+ label="SenseVoice Model"
91
+ )
92
+ punct = gr.Checkbox(label="Enable Punctuation (SenseVoice)", value=True)
93
+ audio_in = gr.Audio(
94
+ source="upload+microphone",
95
+ type="filepath",
96
+ label="Upload or Record Audio"
97
+ )
98
+ with gr.Row():
99
+ out_whisper = gr.Textbox(label="Whisper Transcript")
100
+ out_sense = gr.Textbox(label="SenseVoice Transcript")
101
+ btn = gr.Button("Transcribe")
102
+ btn.click(
103
+ fn=transcribe,
104
+ inputs=[whisper_dd, sense_dd, audio_in, punct],
105
+ outputs=[out_whisper, out_sense]
106
+ )
107
+
108
+ if __name__ == "__main__":
109
+ demo.launch()
requirements.txt ADDED
File without changes