Harshad Bhandwaldar commited on
Commit
e320838
·
1 Parent(s): 5c3505e

model added

Browse files
Files changed (1) hide show
  1. app.py +21 -19
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import os
2
- #os.system("pip install nemo_toolkit['all']")
3
  import pytube
4
  import gradio as gr
5
 
@@ -10,19 +10,21 @@ model = nemo_asr.models.EncDecCTCModel.from_pretrained(
10
  model_name="stt_en_quartznet15x5"
11
  )
12
 
13
- def speech_youtube(x):
14
- data = pytube.YouTube(x)
15
- audio = data.streams.get_audio_only()
16
- text = model.transcribe(audio.download())
17
- return text['text']
18
 
19
  def speech_file(x):
 
20
  text = model.transcribe([f"{x}"])
 
21
  return text
22
 
23
- def speech_record(x):
24
- text = model.transcribe(x)
25
- return text['text']
26
 
27
  css = """
28
  .gradio-container {
@@ -113,23 +115,23 @@ with gr.Blocks(css = css) as demo:
113
  # Speech to Text Transcriptions!
114
  This demo uses the OpenAI whisper model which is trained on a large dataset of diverse audio that can perform multilingual speech recognition. The computation time is dependent on the length of the audio.
115
  """)
116
- with gr.Tab("YouTube"):
117
- audio_input = gr.Textbox(label="YouTube Link", placeholder="paste the youtube link here")
118
- text_output = gr.Textbox(label="Transcription", show_label=False)
119
- youtube_button = gr.Button("Transcribe")
120
  with gr.Tab("Audio File"):
121
  with gr.Row().style(equal_height=True):
122
  audio_input2 = gr.Audio(label="Audio File", type="filepath")
123
  text_output2 = gr.Textbox(label="Transcription", show_label=False)
124
  file_button = gr.Button("Transcribe")
125
- with gr.Tab("Record"):
126
- with gr.Row().style(equal_height=True):
127
- audio_input3 = gr.Audio(label="Input Audio", source="microphone", type="filepath")
128
- text_output3 = gr.Textbox(label="Transcription", show_label=False)
129
- rec_button = gr.Button("Transcribe")
130
  gr.HTML('''
131
  <div class="footer">
132
- <p>Model by <a href="https://github.com/openai/whisper" style="text-decoration: underline;" target="_blank">OpenAI</a> - Gradio Demo by 👩🏽‍🦱 <a href="https://www.linkedin.com/in/oayodeji/" style="text-decoration: underline;" target="_blank">Wvle</a>
133
  </p>
134
  </div>
135
  ''')
 
1
  import os
2
+ os.system("pip install nemo_toolkit['all']")
3
  import pytube
4
  import gradio as gr
5
 
 
10
  model_name="stt_en_quartznet15x5"
11
  )
12
 
13
+ # def speech_youtube(x):
14
+ # data = pytube.YouTube(x)
15
+ # audio = data.streams.get_audio_only()
16
+ # text = model.transcribe(audio.download())
17
+ # return text['text']
18
 
19
  def speech_file(x):
20
+ print(x)
21
  text = model.transcribe([f"{x}"])
22
+ print(text)
23
  return text
24
 
25
+ # def speech_record(x):
26
+ # text = model.transcribe(x)
27
+ # return text['text']
28
 
29
  css = """
30
  .gradio-container {
 
115
  # Speech to Text Transcriptions!
116
  This demo uses the OpenAI whisper model which is trained on a large dataset of diverse audio that can perform multilingual speech recognition. The computation time is dependent on the length of the audio.
117
  """)
118
+ # with gr.Tab("YouTube"):
119
+ # audio_input = gr.Textbox(label="YouTube Link", placeholder="paste the youtube link here")
120
+ # text_output = gr.Textbox(label="Transcription", show_label=False)
121
+ # youtube_button = gr.Button("Transcribe")
122
  with gr.Tab("Audio File"):
123
  with gr.Row().style(equal_height=True):
124
  audio_input2 = gr.Audio(label="Audio File", type="filepath")
125
  text_output2 = gr.Textbox(label="Transcription", show_label=False)
126
  file_button = gr.Button("Transcribe")
127
+ # with gr.Tab("Record"):
128
+ # with gr.Row().style(equal_height=True):
129
+ # audio_input3 = gr.Audio(label="Input Audio", source="microphone", type="filepath")
130
+ # text_output3 = gr.Textbox(label="Transcription", show_label=False)
131
+ # rec_button = gr.Button("Transcribe")
132
  gr.HTML('''
133
  <div class="footer">
134
+ <p></a>
135
  </p>
136
  </div>
137
  ''')