Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -81,28 +81,23 @@ def transcribe_audio(audio_file):
|
|
81 |
if audio_file.endswith(".m4a"):
|
82 |
audio_file = convert_to_wav(audio_file)
|
83 |
|
84 |
-
# Load using torchaudio
|
85 |
-
waveform, sample_rate = torchaudio.load(audio_file)
|
86 |
-
|
87 |
start_time = time.time()
|
88 |
|
|
|
89 |
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
text = pipe(waveform, sampling_rate=sample_rate)["text"]
|
95 |
|
96 |
|
97 |
output_time = time.time() - start_time
|
98 |
|
99 |
-
|
|
|
100 |
audio_duration = waveform.shape[1] / sample_rate
|
101 |
|
102 |
# Find audio duration@pipeline's internal method
|
103 |
#audio_duration = pipe.feature_extractor.sampling_rate * len(pipe.feature_extractor(audio_file)["input_features"][0]) / pipe.feature_extractor.sampling_rate
|
104 |
|
105 |
-
# Real-time Factor
|
106 |
rtf = output_time / audio_duration
|
107 |
|
108 |
# Format of the result
|
@@ -227,29 +222,21 @@ def save_to_pdf(text, summary):
|
|
227 |
pdf.output(pdf_output_path)
|
228 |
return pdf_output_path
|
229 |
|
230 |
-
def _return_img_html_embed(img_url):
|
231 |
-
HTML_str = (
|
232 |
-
f'<center><img src="{img_url}" alt="Imagerine" style="width:100%; height:auto;"></center>'
|
233 |
-
)
|
234 |
-
return HTML_str
|
235 |
-
|
236 |
-
# Gradio Interface
|
237 |
def display_image():
|
238 |
-
img_url = "https://huggingface.co/spaces/camparchimedes/transcription_app/
|
239 |
-
|
240 |
-
return html_embed_str
|
241 |
|
242 |
iface = gr.Blocks()
|
243 |
|
244 |
with iface:
|
245 |
-
gr.
|
246 |
gr.Markdown("# Vi har nå muligheten til å oversette lydfiler til norsk skrift.")
|
247 |
|
248 |
with gr.Tabs():
|
249 |
with gr.TabItem("Transcription"):
|
250 |
audio_input = gr.Audio(type="filepath")
|
251 |
text_output = gr.Textbox(label="Text")
|
252 |
-
result_output = gr.Textbox(label="
|
253 |
transcribe_button = gr.Button("Transcribe")
|
254 |
|
255 |
transcribe_button.click(fn=transcribe_audio, inputs=[audio_input], outputs=[text_output, result_output])
|
|
|
81 |
if audio_file.endswith(".m4a"):
|
82 |
audio_file = convert_to_wav(audio_file)
|
83 |
|
|
|
|
|
|
|
84 |
start_time = time.time()
|
85 |
|
86 |
+
# Load using torchaudio
|
87 |
|
88 |
+
text = pipe(audio_file)["text"]
|
|
|
|
|
|
|
|
|
89 |
|
90 |
|
91 |
output_time = time.time() - start_time
|
92 |
|
93 |
+
waveform, sample_rate = torchaudio.load(audio_file)
|
94 |
+
# Audio duration (in seconds)
|
95 |
audio_duration = waveform.shape[1] / sample_rate
|
96 |
|
97 |
# Find audio duration@pipeline's internal method
|
98 |
#audio_duration = pipe.feature_extractor.sampling_rate * len(pipe.feature_extractor(audio_file)["input_features"][0]) / pipe.feature_extractor.sampling_rate
|
99 |
|
100 |
+
# Real-time Factor (RTF)
|
101 |
rtf = output_time / audio_duration
|
102 |
|
103 |
# Format of the result
|
|
|
222 |
pdf.output(pdf_output_path)
|
223 |
return pdf_output_path
|
224 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
225 |
def display_image():
|
226 |
+
img_url = "https://huggingface.co/spaces/camparchimedes/transcription_app/raw/main/picture.png"
|
227 |
+
return img_url
|
|
|
228 |
|
229 |
iface = gr.Blocks()
|
230 |
|
231 |
with iface:
|
232 |
+
gr.Image(display_image(), label="Image")
|
233 |
gr.Markdown("# Vi har nå muligheten til å oversette lydfiler til norsk skrift.")
|
234 |
|
235 |
with gr.Tabs():
|
236 |
with gr.TabItem("Transcription"):
|
237 |
audio_input = gr.Audio(type="filepath")
|
238 |
text_output = gr.Textbox(label="Text")
|
239 |
+
result_output = gr.Textbox(label="Transcription Details")
|
240 |
transcribe_button = gr.Button("Transcribe")
|
241 |
|
242 |
transcribe_button.click(fn=transcribe_audio, inputs=[audio_input], outputs=[text_output, result_output])
|