camparchimedes commited on
Commit
52a656c
·
verified ·
1 Parent(s): 130c2db

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -22
app.py CHANGED
@@ -81,28 +81,23 @@ def transcribe_audio(audio_file):
81
  if audio_file.endswith(".m4a"):
82
  audio_file = convert_to_wav(audio_file)
83
 
84
- # Load using torchaudio
85
- waveform, sample_rate = torchaudio.load(audio_file)
86
-
87
  start_time = time.time()
88
 
 
89
 
90
-
91
-
92
-
93
-
94
- text = pipe(waveform, sampling_rate=sample_rate)["text"]
95
 
96
 
97
  output_time = time.time() - start_time
98
 
99
- # Calculate audio duration (in seconds)
 
100
  audio_duration = waveform.shape[1] / sample_rate
101
 
102
  # Find audio duration@pipeline's internal method
103
  #audio_duration = pipe.feature_extractor.sampling_rate * len(pipe.feature_extractor(audio_file)["input_features"][0]) / pipe.feature_extractor.sampling_rate
104
 
105
- # Real-time Factor calculation
106
  rtf = output_time / audio_duration
107
 
108
  # Format of the result
@@ -227,29 +222,21 @@ def save_to_pdf(text, summary):
227
  pdf.output(pdf_output_path)
228
  return pdf_output_path
229
 
230
- def _return_img_html_embed(img_url):
231
- HTML_str = (
232
- f'<center><img src="{img_url}" alt="Imagerine" style="width:100%; height:auto;"></center>'
233
- )
234
- return HTML_str
235
-
236
- # Gradio Interface
237
  def display_image():
238
- img_url = "https://huggingface.co/spaces/camparchimedes/transcription_app/blob/main/picture.png"
239
- html_embed_str = _return_img_html_embed(img_url)
240
- return html_embed_str
241
 
242
  iface = gr.Blocks()
243
 
244
  with iface:
245
- gr.HTML(display_image())
246
  gr.Markdown("# Vi har nå muligheten til å oversette lydfiler til norsk skrift.")
247
 
248
  with gr.Tabs():
249
  with gr.TabItem("Transcription"):
250
  audio_input = gr.Audio(type="filepath")
251
  text_output = gr.Textbox(label="Text")
252
- result_output = gr.Textbox(label="Time taken and Number of words")
253
  transcribe_button = gr.Button("Transcribe")
254
 
255
  transcribe_button.click(fn=transcribe_audio, inputs=[audio_input], outputs=[text_output, result_output])
 
81
  if audio_file.endswith(".m4a"):
82
  audio_file = convert_to_wav(audio_file)
83
 
 
 
 
84
  start_time = time.time()
85
 
86
+ # Load using torchaudio
87
 
88
+ text = pipe(audio_file)["text"]
 
 
 
 
89
 
90
 
91
  output_time = time.time() - start_time
92
 
93
+ waveform, sample_rate = torchaudio.load(audio_file)
94
+ # Audio duration (in seconds)
95
  audio_duration = waveform.shape[1] / sample_rate
96
 
97
  # Find audio duration@pipeline's internal method
98
  #audio_duration = pipe.feature_extractor.sampling_rate * len(pipe.feature_extractor(audio_file)["input_features"][0]) / pipe.feature_extractor.sampling_rate
99
 
100
+ # Real-time Factor (RTF)
101
  rtf = output_time / audio_duration
102
 
103
  # Format of the result
 
222
  pdf.output(pdf_output_path)
223
  return pdf_output_path
224
 
 
 
 
 
 
 
 
225
  def display_image():
226
+ img_url = "https://huggingface.co/spaces/camparchimedes/transcription_app/raw/main/picture.png"
227
+ return img_url
 
228
 
229
  iface = gr.Blocks()
230
 
231
  with iface:
232
+ gr.Image(display_image(), label="Image")
233
  gr.Markdown("# Vi har nå muligheten til å oversette lydfiler til norsk skrift.")
234
 
235
  with gr.Tabs():
236
  with gr.TabItem("Transcription"):
237
  audio_input = gr.Audio(type="filepath")
238
  text_output = gr.Textbox(label="Text")
239
+ result_output = gr.Textbox(label="Transcription Details")
240
  transcribe_button = gr.Button("Transcribe")
241
 
242
  transcribe_button.click(fn=transcribe_audio, inputs=[audio_input], outputs=[text_output, result_output])