Athspi commited on
Commit
df332ec
·
verified ·
1 Parent(s): cf9ed4e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -26
app.py CHANGED
@@ -3,7 +3,7 @@ import whisper
3
  import torch
4
  import os
5
  from pydub import AudioSegment
6
- from transformers import pipeline
7
 
8
  # Mapping of model names to Whisper model sizes
9
  MODELS = {
@@ -14,13 +14,11 @@ MODELS = {
14
  "Large (Most Accurate)": "large"
15
  }
16
 
17
- # Fine-tuned models for specific languages
18
- FINE_TUNED_MODELS = {
19
- "Tamil": {
20
- "model": "vasista22/whisper-tamil-medium",
21
- "language": "ta"
22
- },
23
- # Add more fine-tuned models for other languages here
24
  }
25
 
26
  # Mapping of full language names to language codes
@@ -136,20 +134,11 @@ def transcribe_audio(audio_file, language="Auto Detect", model_size="Base (Faste
136
  audio.export(processed_audio_path, format="wav")
137
 
138
  # Load the appropriate model
139
- if language in FINE_TUNED_MODELS:
140
- # Use the fine-tuned Whisper model for the selected language
141
- device = "cuda:0" if torch.cuda.is_available() else "cpu"
142
- transcribe = pipeline(
143
- task="automatic-speech-recognition",
144
- model=FINE_TUNED_MODELS[language]["model"],
145
- chunk_length_s=30,
146
- device=device
147
- )
148
- # Fix for the `suppress_tokens` issue
149
- if hasattr(transcribe.model.config, "suppress_tokens"):
150
- transcribe.model.config.suppress_tokens = [] # Set to an empty list
151
- result = transcribe(processed_audio_path)
152
- transcription = result["text"]
153
  detected_language = language
154
  else:
155
  # Use the selected Whisper model
@@ -174,7 +163,7 @@ def transcribe_audio(audio_file, language="Auto Detect", model_size="Base (Faste
174
 
175
  # Define the Gradio interface
176
  with gr.Blocks() as demo:
177
- gr.Markdown("# Audio Transcription with Fine-Tuned Models")
178
 
179
  with gr.Tab("Transcribe Audio"):
180
  gr.Markdown("Upload an audio file, select a language (or choose 'Auto Detect'), and choose a model for transcription.")
@@ -195,9 +184,9 @@ with gr.Blocks() as demo:
195
 
196
  # Update model dropdown based on language selection
197
  def update_model_dropdown(language):
198
- if language in FINE_TUNED_MODELS:
199
- # Add "Fine-Tuned Model" to the dropdown choices and disable it
200
- return gr.Dropdown(choices=["Fine-Tuned Model"], value="Fine-Tuned Model", interactive=False)
201
  else:
202
  # Reset the dropdown to standard Whisper models
203
  return gr.Dropdown(choices=list(MODELS.keys()), value="Base (Faster)", interactive=True)
 
3
  import torch
4
  import os
5
  from pydub import AudioSegment
6
+ from huggingsound import SpeechRecognitionModel
7
 
8
  # Mapping of model names to Whisper model sizes
9
  MODELS = {
 
14
  "Large (Most Accurate)": "large"
15
  }
16
 
17
+ # HuggingSound model for Arabic
18
+ HUGGINGSOUND_MODEL = {
19
+ "Arabic": {
20
+ "model": "jonatasgrosman/wav2vec2-large-xlsr-53-arabic"
21
+ }
 
 
22
  }
23
 
24
  # Mapping of full language names to language codes
 
134
  audio.export(processed_audio_path, format="wav")
135
 
136
  # Load the appropriate model
137
+ if language in HUGGINGSOUND_MODEL:
138
+ # Use the HuggingSound model for the selected language
139
+ model = SpeechRecognitionModel(HUGGINGSOUND_MODEL[language]["model"])
140
+ transcriptions = model.transcribe([processed_audio_path])
141
+ transcription = transcriptions[0]["transcription"]
 
 
 
 
 
 
 
 
 
142
  detected_language = language
143
  else:
144
  # Use the selected Whisper model
 
163
 
164
  # Define the Gradio interface
165
  with gr.Blocks() as demo:
166
+ gr.Markdown("# Audio Transcription with HuggingSound and Whisper")
167
 
168
  with gr.Tab("Transcribe Audio"):
169
  gr.Markdown("Upload an audio file, select a language (or choose 'Auto Detect'), and choose a model for transcription.")
 
184
 
185
  # Update model dropdown based on language selection
186
  def update_model_dropdown(language):
187
+ if language in HUGGINGSOUND_MODEL:
188
+ # Add "HuggingSound Model" to the dropdown choices and disable it
189
+ return gr.Dropdown(choices=["HuggingSound Model"], value="HuggingSound Model", interactive=False)
190
  else:
191
  # Reset the dropdown to standard Whisper models
192
  return gr.Dropdown(choices=list(MODELS.keys()), value="Base (Faster)", interactive=True)