MusIre commited on
Commit
7a0b87f
·
1 Parent(s): 88dc3ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -4
app.py CHANGED
@@ -12,14 +12,16 @@ processor = WhisperProcessor.from_pretrained("openai/whisper-large")
12
  model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
13
  forced_decoder_ids = processor.get_decoder_prompt_ids(language="italian", task="transcribe")
14
 
15
-
16
  # Custom preprocessing function
17
  def preprocess_audio(audio_data):
18
  # Apply any custom preprocessing to the audio data here if needed
19
  return processor(audio_data, return_tensors="pt").input_features
20
 
21
  # Function to perform ASR on audio data
22
- def transcribe_audio(input_features):
 
 
 
23
  # Generate token ids
24
  predicted_ids = model.generate(input_features)
25
 
@@ -29,5 +31,5 @@ def transcribe_audio(input_features):
29
  return transcription[0]
30
 
31
  # Create Gradio interface
32
- audio_input = gr.Audio(preprocess=preprocess_audio)
33
- gr.Interface(fn=transcribe_audio, inputs=audio_input, outputs="text").launch()
 
12
  model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
13
  forced_decoder_ids = processor.get_decoder_prompt_ids(language="italian", task="transcribe")
14
 
 
15
  # Custom preprocessing function
16
  def preprocess_audio(audio_data):
17
  # Apply any custom preprocessing to the audio data here if needed
18
  return processor(audio_data, return_tensors="pt").input_features
19
 
20
  # Function to perform ASR on audio data
21
+ def transcribe_audio(audio_data):
22
+ # Preprocess the audio data
23
+ input_features = preprocess_audio(audio_data)
24
+
25
  # Generate token ids
26
  predicted_ids = model.generate(input_features)
27
 
 
31
  return transcription[0]
32
 
33
  # Create Gradio interface
34
+ audio_input = gr.Audio()
35
+ gr.Interface(fn=transcribe_audio, inputs=audio_input, outputs="text").launch()