Spaces:
mskov
/
Runtime error

mskov commited on
Commit
3036933
·
1 Parent(s): d51a20c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -3
app.py CHANGED
@@ -5,21 +5,28 @@ import whisper
5
 
6
  from share_btn import community_icon_html, loading_icon_html, share_js
7
 
 
8
  model = whisper.load_model("tiny")
9
 
10
 
11
 
12
  def inference(audio):
 
13
  audio = whisper.load_audio(audio)
 
14
  audio = whisper.pad_or_trim(audio)
15
-
 
16
  mel = whisper.log_mel_spectrogram(audio).to(model.device)
17
 
18
  _, probs = model.detect_language(mel)
19
-
 
20
  options = whisper.DecodingOptions(fp16 = False)
 
21
  result = whisper.decode(model, mel, options)
22
-
 
23
  print(result.text)
24
  return result.text, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
25
 
@@ -172,6 +179,7 @@ with block:
172
  with gr.Group():
173
  with gr.Box():
174
  with gr.Row().style(mobile_collapse=False, equal_height=True):
 
175
  audio = gr.Audio(
176
  label="Input Audio",
177
  show_label=False,
 
5
 
6
  from share_btn import community_icon_html, loading_icon_html, share_js
7
 
8
+ # whisper model specification
9
  model = whisper.load_model("tiny")
10
 
11
 
12
 
13
  def inference(audio):
14
+ # load audio data
15
  audio = whisper.load_audio(audio)
16
+ # ensure sample is in correct format for inference
17
  audio = whisper.pad_or_trim(audio)
18
+
19
+ # generate a log-mel spetrogram of the audio data
20
  mel = whisper.log_mel_spectrogram(audio).to(model.device)
21
 
22
  _, probs = model.detect_language(mel)
23
+
24
+ # decode audio data
25
  options = whisper.DecodingOptions(fp16 = False)
26
+ # transcribe speech to text
27
  result = whisper.decode(model, mel, options)
28
+
29
+ # print audio data as text
30
  print(result.text)
31
  return result.text, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
32
 
 
179
  with gr.Group():
180
  with gr.Box():
181
  with gr.Row().style(mobile_collapse=False, equal_height=True):
182
+ # get audio from microphone
183
  audio = gr.Audio(
184
  label="Input Audio",
185
  show_label=False,