Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,7 @@ import whisper
|
|
3 |
import torch
|
4 |
import os
|
5 |
from pydub import AudioSegment
|
6 |
-
from
|
7 |
|
8 |
# Mapping of model names to Whisper model sizes
|
9 |
MODELS = {
|
@@ -14,13 +14,11 @@ MODELS = {
|
|
14 |
"Large (Most Accurate)": "large"
|
15 |
}
|
16 |
|
17 |
-
#
|
18 |
-
|
19 |
-
"
|
20 |
-
"model": "
|
21 |
-
|
22 |
-
},
|
23 |
-
# Add more fine-tuned models for other languages here
|
24 |
}
|
25 |
|
26 |
# Mapping of full language names to language codes
|
@@ -136,20 +134,11 @@ def transcribe_audio(audio_file, language="Auto Detect", model_size="Base (Faste
|
|
136 |
audio.export(processed_audio_path, format="wav")
|
137 |
|
138 |
# Load the appropriate model
|
139 |
-
if language in
|
140 |
-
# Use the
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
model=FINE_TUNED_MODELS[language]["model"],
|
145 |
-
chunk_length_s=30,
|
146 |
-
device=device
|
147 |
-
)
|
148 |
-
# Fix for the `suppress_tokens` issue
|
149 |
-
if hasattr(transcribe.model.config, "suppress_tokens"):
|
150 |
-
transcribe.model.config.suppress_tokens = [] # Set to an empty list
|
151 |
-
result = transcribe(processed_audio_path)
|
152 |
-
transcription = result["text"]
|
153 |
detected_language = language
|
154 |
else:
|
155 |
# Use the selected Whisper model
|
@@ -174,7 +163,7 @@ def transcribe_audio(audio_file, language="Auto Detect", model_size="Base (Faste
|
|
174 |
|
175 |
# Define the Gradio interface
|
176 |
with gr.Blocks() as demo:
|
177 |
-
gr.Markdown("# Audio Transcription with
|
178 |
|
179 |
with gr.Tab("Transcribe Audio"):
|
180 |
gr.Markdown("Upload an audio file, select a language (or choose 'Auto Detect'), and choose a model for transcription.")
|
@@ -195,9 +184,9 @@ with gr.Blocks() as demo:
|
|
195 |
|
196 |
# Update model dropdown based on language selection
|
197 |
def update_model_dropdown(language):
|
198 |
-
if language in
|
199 |
-
# Add "
|
200 |
-
return gr.Dropdown(choices=["
|
201 |
else:
|
202 |
# Reset the dropdown to standard Whisper models
|
203 |
return gr.Dropdown(choices=list(MODELS.keys()), value="Base (Faster)", interactive=True)
|
|
|
3 |
import torch
|
4 |
import os
|
5 |
from pydub import AudioSegment
|
6 |
+
from huggingsound import SpeechRecognitionModel
|
7 |
|
8 |
# Mapping of model names to Whisper model sizes
|
9 |
MODELS = {
|
|
|
14 |
"Large (Most Accurate)": "large"
|
15 |
}
|
16 |
|
17 |
+
# HuggingSound model for Arabic
|
18 |
+
HUGGINGSOUND_MODEL = {
|
19 |
+
"Arabic": {
|
20 |
+
"model": "jonatasgrosman/wav2vec2-large-xlsr-53-arabic"
|
21 |
+
}
|
|
|
|
|
22 |
}
|
23 |
|
24 |
# Mapping of full language names to language codes
|
|
|
134 |
audio.export(processed_audio_path, format="wav")
|
135 |
|
136 |
# Load the appropriate model
|
137 |
+
if language in HUGGINGSOUND_MODEL:
|
138 |
+
# Use the HuggingSound model for the selected language
|
139 |
+
model = SpeechRecognitionModel(HUGGINGSOUND_MODEL[language]["model"])
|
140 |
+
transcriptions = model.transcribe([processed_audio_path])
|
141 |
+
transcription = transcriptions[0]["transcription"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
detected_language = language
|
143 |
else:
|
144 |
# Use the selected Whisper model
|
|
|
163 |
|
164 |
# Define the Gradio interface
|
165 |
with gr.Blocks() as demo:
|
166 |
+
gr.Markdown("# Audio Transcription with HuggingSound and Whisper")
|
167 |
|
168 |
with gr.Tab("Transcribe Audio"):
|
169 |
gr.Markdown("Upload an audio file, select a language (or choose 'Auto Detect'), and choose a model for transcription.")
|
|
|
184 |
|
185 |
# Update model dropdown based on language selection
|
186 |
def update_model_dropdown(language):
|
187 |
+
if language in HUGGINGSOUND_MODEL:
|
188 |
+
# Add "HuggingSound Model" to the dropdown choices and disable it
|
189 |
+
return gr.Dropdown(choices=["HuggingSound Model"], value="HuggingSound Model", interactive=False)
|
190 |
else:
|
191 |
# Reset the dropdown to standard Whisper models
|
192 |
return gr.Dropdown(choices=list(MODELS.keys()), value="Base (Faster)", interactive=True)
|