Ivan000 commited on
Commit
bbaed16
·
verified ·
1 Parent(s): 2acd67b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -7
app.py CHANGED
@@ -1,11 +1,11 @@
1
  # app.py
2
  # =============
3
- # This is a complete app.py file for an automatic speech recognition app using the openai/whisper-large-v3-turbo model.
4
  # The app is built using Gradio and Hugging Face Transformers, and it runs on the CPU to avoid video memory usage.
5
 
6
  import torch
7
- from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
8
- import gradio as gr
9
 
10
  # Set device to CPU
11
  device = "cpu"
@@ -31,26 +31,37 @@ pipe = pipeline(
31
  device=device,
32
  )
33
 
34
- def transcribe_audio(audio_file):
 
35
  """
36
  Transcribe the given audio file using the Whisper model.
37
 
38
  Parameters:
39
  audio_file (str): Path to the audio file.
 
40
 
41
  Returns:
42
  str: Transcribed text.
43
  """
44
- result = pipe(audio_file)
 
45
  return result["text"]
46
 
47
  # Define the Gradio interface
48
  iface = gr.Interface(
49
  fn=transcribe_audio,
50
- inputs=gr.Audio(label="Upload Audio", type="filepath"),
 
 
 
 
 
 
 
 
51
  outputs=gr.Textbox(label="Transcription"),
52
  title="Whisper ASR Demo",
53
- description="Upload an audio file and get the transcribed text using the openai/whisper-large-v3-turbo model.",
54
  )
55
 
56
  # Launch the Gradio app
 
1
  # app.py
2
  # =============
3
+ # This is a complete app.py file for an automatic Speech Recognition (ASR) using the openai/whisper-large-v3-turbo model.
4
  # The app is built using Gradio and Hugging Face Transformers, and it runs on the CPU to avoid video memory usage.
5
 
6
  import torch
7
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
8
+ import gradio as gr
9
 
10
  # Set device to CPU
11
  device = "cpu"
 
31
  device=device,
32
  )
33
 
34
+ # Define the transcription function
35
+ def transcribe_audio(audio_file, language):
36
  """
37
  Transcribe the given audio file using the Whisper model.
38
 
39
  Parameters:
40
  audio_file (str): Path to the audio file.
41
+ language (str): Language code for transcription.
42
 
43
  Returns:
44
  str: Transcribed text.
45
  """
46
+ generate_kwargs = {"language": language}
47
+ result = pipe(audio_file, generate_kwargs=generate_kwargs)
48
  return result["text"]
49
 
50
  # Define the Gradio interface
51
  iface = gr.Interface(
52
  fn=transcribe_audio,
53
+ inputs=[
54
+ gr.Audio(label="Upload Audio", type="filepath"),
55
+ gr.Dropdown(
56
+ label="Select Language",
57
+ choices=["en", "ru", "es", "fr", "de", "zh", "ja", "ko", "pt", "it"],
58
+ value="en",
59
+ description="Select the language for transcription."
60
+ )
61
+ ],
62
  outputs=gr.Textbox(label="Transcription"),
63
  title="Whisper ASR Demo",
64
+ description="Upload an audio file and select the language to get the transcribed text using the openai/whisper-large-v3-turbo model.",
65
  )
66
 
67
  # Launch the Gradio app