Spaces:

Athspi
/

Ai-audio

Sleeping

App Files Files Community

Athspi commited on Jan 11

Commit

df332ec

verified ·

1 Parent(s): cf9ed4e

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -26

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import whisper
 import torch
 import os
 from pydub import AudioSegment
-from transformers import pipeline
 # Mapping of model names to Whisper model sizes
 MODELS = {
@@ -14,13 +14,11 @@ MODELS = {
     "Large (Most Accurate)": "large"
 }
-# Fine-tuned models for specific languages
-FINE_TUNED_MODELS = {
-    "Tamil": {
-        "model": "vasista22/whisper-tamil-medium",
-        "language": "ta"
-    },
-    # Add more fine-tuned models for other languages here
 }
 # Mapping of full language names to language codes
@@ -136,20 +134,11 @@ def transcribe_audio(audio_file, language="Auto Detect", model_size="Base (Faste
     audio.export(processed_audio_path, format="wav")
     # Load the appropriate model
-    if language in FINE_TUNED_MODELS:
-        # Use the fine-tuned Whisper model for the selected language
-        device = "cuda:0" if torch.cuda.is_available() else "cpu"
-        transcribe = pipeline(
-            task="automatic-speech-recognition",
-            model=FINE_TUNED_MODELS[language]["model"],
-            chunk_length_s=30,
-            device=device
-        )
-        # Fix for the `suppress_tokens` issue
-        if hasattr(transcribe.model.config, "suppress_tokens"):
-            transcribe.model.config.suppress_tokens = []  # Set to an empty list
-        result = transcribe(processed_audio_path)
-        transcription = result["text"]
         detected_language = language
     else:
         # Use the selected Whisper model
@@ -174,7 +163,7 @@ def transcribe_audio(audio_file, language="Auto Detect", model_size="Base (Faste
 # Define the Gradio interface
 with gr.Blocks() as demo:
-    gr.Markdown("# Audio Transcription with Fine-Tuned Models")
     with gr.Tab("Transcribe Audio"):
         gr.Markdown("Upload an audio file, select a language (or choose 'Auto Detect'), and choose a model for transcription.")
@@ -195,9 +184,9 @@ with gr.Blocks() as demo:
     # Update model dropdown based on language selection
     def update_model_dropdown(language):
-        if language in FINE_TUNED_MODELS:
-            # Add "Fine-Tuned Model" to the dropdown choices and disable it
-            return gr.Dropdown(choices=["Fine-Tuned Model"], value="Fine-Tuned Model", interactive=False)
         else:
             # Reset the dropdown to standard Whisper models
             return gr.Dropdown(choices=list(MODELS.keys()), value="Base (Faster)", interactive=True)

 import torch
 import os
 from pydub import AudioSegment
+from huggingsound import SpeechRecognitionModel
 # Mapping of model names to Whisper model sizes
 MODELS = {
     "Large (Most Accurate)": "large"
 }
+# HuggingSound model for Arabic
+HUGGINGSOUND_MODEL = {
+    "Arabic": {
+        "model": "jonatasgrosman/wav2vec2-large-xlsr-53-arabic"
+    }
 }
 # Mapping of full language names to language codes
     audio.export(processed_audio_path, format="wav")
     # Load the appropriate model
+    if language in HUGGINGSOUND_MODEL:
+        # Use the HuggingSound model for the selected language
+        model = SpeechRecognitionModel(HUGGINGSOUND_MODEL[language]["model"])
+        transcriptions = model.transcribe([processed_audio_path])
+        transcription = transcriptions[0]["transcription"]
         detected_language = language
     else:
         # Use the selected Whisper model
 # Define the Gradio interface
 with gr.Blocks() as demo:
+    gr.Markdown("# Audio Transcription with HuggingSound and Whisper")
     with gr.Tab("Transcribe Audio"):
         gr.Markdown("Upload an audio file, select a language (or choose 'Auto Detect'), and choose a model for transcription.")
     # Update model dropdown based on language selection
     def update_model_dropdown(language):
+        if language in HUGGINGSOUND_MODEL:
+            # Add "HuggingSound Model" to the dropdown choices and disable it
+            return gr.Dropdown(choices=["HuggingSound Model"], value="HuggingSound Model", interactive=False)
         else:
             # Reset the dropdown to standard Whisper models
             return gr.Dropdown(choices=list(MODELS.keys()), value="Base (Faster)", interactive=True)