Spaces:

Athspi
/

Ai-audio

Running

App Files Files Community

Athspi commited on Jan 12

Commit

6b2690e

verified ·

1 Parent(s): 3d4cc8c

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -16

app.py CHANGED Viewed

@@ -5,9 +5,6 @@ import os
 from pydub import AudioSegment
 from transformers import pipeline
-# Ensure compatible versions of torch and transformers are installed
-# Run: pip install torch==1.13.1 transformers==4.26.1
 # Mapping of model names to Whisper model sizes
 MODELS = {
     "Tiny (Fastest)": "tiny",
@@ -130,6 +127,26 @@ LANGUAGE_NAME_TO_CODE = {
     "Sundanese": "su",
 }
 def transcribe_audio(audio_file, language="Auto Detect", model_size="Base (Faster)"):
     """Transcribe the audio file."""
     # Convert audio to 16kHz mono for better compatibility
@@ -178,7 +195,13 @@ def transcribe_audio(audio_file, language="Auto Detect", model_size="Base (Faste
 # Define the Gradio interface
 with gr.Blocks() as demo:
-    gr.Markdown("# Audio Transcription with Fine-Tuned Models")
     with gr.Tab("Transcribe Audio"):
         gr.Markdown("Upload an audio file, select a language (or choose 'Auto Detect'), and choose a model for transcription.")
@@ -197,18 +220,8 @@ with gr.Blocks() as demo:
         transcribe_output = gr.Textbox(label="Transcription and Detected Language")
         transcribe_button = gr.Button("Transcribe Audio")
-    # Update model dropdown based on language selection
-    def update_model_dropdown(language):
-        if language in FINE_TUNED_MODELS:
-            # Add "Fine-Tuned Model" to the dropdown choices and disable it
-            return gr.Dropdown(choices=["Fine-Tuned Model"], value="Fine-Tuned Model", interactive=False)
-        else:
-            # Reset the dropdown to standard Whisper models
-            return gr.Dropdown(choices=list(MODELS.keys()), value="Base (Faster)", interactive=True)
-    language_dropdown.change(update_model_dropdown, inputs=language_dropdown, outputs=model_dropdown)
-    # Link button to function
     transcribe_button.click(transcribe_audio, inputs=[transcribe_audio_input, language_dropdown, model_dropdown], outputs=transcribe_output)
 # Launch the Gradio interface

 from pydub import AudioSegment
 from transformers import pipeline
 # Mapping of model names to Whisper model sizes
 MODELS = {
     "Tiny (Fastest)": "tiny",
     "Sundanese": "su",
 }
+def detect_language(audio_file):
+    """Detect the language of the audio file."""
+    # Load the Whisper model (use "base" for faster detection)
+    model = whisper.load_model("base")
+    # Convert audio to 16kHz mono for better compatibility with Whisper
+    audio = AudioSegment.from_file(audio_file)
+    audio = audio.set_frame_rate(16000).set_channels(1)
+    processed_audio_path = "processed_audio.wav"
+    audio.export(processed_audio_path, format="wav")
+    # Detect the language
+    result = model.transcribe(processed_audio_path, task="detect_language", fp16=False)
+    detected_language = result.get("language", "unknown")
+    # Clean up processed audio file
+    os.remove(processed_audio_path)
+    return f"Detected Language: {detected_language}"
 def transcribe_audio(audio_file, language="Auto Detect", model_size="Base (Faster)"):
     """Transcribe the audio file."""
     # Convert audio to 16kHz mono for better compatibility
 # Define the Gradio interface
 with gr.Blocks() as demo:
+    gr.Markdown("# Audio Transcription and Language Detection")
+    with gr.Tab("Detect Language"):
+        gr.Markdown("Upload an audio file to detect its language.")
+        detect_audio_input = gr.Audio(type="filepath", label="Upload Audio File")
+        detect_language_output = gr.Textbox(label="Detected Language")
+        detect_button = gr.Button("Detect Language")
     with gr.Tab("Transcribe Audio"):
         gr.Markdown("Upload an audio file, select a language (or choose 'Auto Detect'), and choose a model for transcription.")
         transcribe_output = gr.Textbox(label="Transcription and Detected Language")
         transcribe_button = gr.Button("Transcribe Audio")
+    # Link buttons to functions
+    detect_button.click(detect_language, inputs=detect_audio_input, outputs=detect_language_output)
     transcribe_button.click(transcribe_audio, inputs=[transcribe_audio_input, language_dropdown, model_dropdown], outputs=transcribe_output)
 # Launch the Gradio interface