Spaces:

Ivan000
/

whisper-large-v3-turbo

Sleeping

App Files Files Community

Ivan000 commited on Dec 10, 2024

Commit

bbaed16

verified ·

1 Parent(s): 2acd67b

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -7

app.py CHANGED Viewed

@@ -1,11 +1,11 @@
 # app.py
 # =============
-# This is a complete app.py file for an automatic speech recognition app using the openai/whisper-large-v3-turbo model.
 # The app is built using Gradio and Hugging Face Transformers, and it runs on the CPU to avoid video memory usage.
 import torch
-from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
-import gradio as gr
 # Set device to CPU
 device = "cpu"
@@ -31,26 +31,37 @@ pipe = pipeline(
     device=device,
 )
-def transcribe_audio(audio_file):
     """
     Transcribe the given audio file using the Whisper model.
     Parameters:
     audio_file (str): Path to the audio file.
     Returns:
     str: Transcribed text.
     """
-    result = pipe(audio_file)
     return result["text"]
 # Define the Gradio interface
 iface = gr.Interface(
     fn=transcribe_audio,
-    inputs=gr.Audio(label="Upload Audio", type="filepath"),
     outputs=gr.Textbox(label="Transcription"),
     title="Whisper ASR Demo",
-    description="Upload an audio file and get the transcribed text using the openai/whisper-large-v3-turbo model.",
 )
 # Launch the Gradio app

 # app.py
 # =============
+# This is a complete app.py file for an automatic Speech Recognition (ASR) using the openai/whisper-large-v3-turbo model.
 # The app is built using Gradio and Hugging Face Transformers, and it runs on the CPU to avoid video memory usage.
 import torch
+        from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+        import gradio as gr
 # Set device to CPU
 device = "cpu"
     device=device,
 )
+# Define the transcription function
+def transcribe_audio(audio_file, language):
     """
     Transcribe the given audio file using the Whisper model.
     Parameters:
     audio_file (str): Path to the audio file.
+    language (str): Language code for transcription.
     Returns:
     str: Transcribed text.
     """
+    generate_kwargs = {"language": language}
+    result = pipe(audio_file, generate_kwargs=generate_kwargs)
     return result["text"]
 # Define the Gradio interface
 iface = gr.Interface(
     fn=transcribe_audio,
+    inputs=[
+        gr.Audio(label="Upload Audio", type="filepath"),
+        gr.Dropdown(
+            label="Select Language",
+            choices=["en", "ru", "es", "fr", "de", "zh", "ja", "ko", "pt", "it"],
+            value="en",
+            description="Select the language for transcription."
+        )
+    ],
     outputs=gr.Textbox(label="Transcription"),
     title="Whisper ASR Demo",
+    description="Upload an audio file and select the language to get the transcribed text using the openai/whisper-large-v3-turbo model.",
 )
 # Launch the Gradio app