Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -14,23 +14,28 @@ from fpdf import FPDF
|
|
14 |
from PIL import Image
|
15 |
import time
|
16 |
import os
|
|
|
|
|
17 |
|
18 |
warnings.filterwarnings("ignore")
|
19 |
|
20 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
21 |
torch_dtype = torch.float32
|
22 |
|
23 |
-
#
|
24 |
pipe = pipeline("automatic-speech-recognition", model="NbAiLabBeta/nb-whisper-large-semantic", device=device, torch_dtype=torch.float32)
|
25 |
|
26 |
-
#
|
27 |
def convert_to_wav(audio_file):
|
28 |
audio = AudioSegment.from_file(audio_file, format="m4a")
|
29 |
wav_file = "temp.wav"
|
30 |
audio.export(wav_file, format="wav")
|
31 |
return wav_file
|
32 |
|
33 |
-
#
|
|
|
|
|
|
|
34 |
def transcribe_audio(audio_file):
|
35 |
if audio_file.endswith(".m4a"):
|
36 |
audio_file = convert_to_wav(audio_file)
|
@@ -50,12 +55,13 @@ def transcribe_audio(audio_file):
|
|
50 |
|
51 |
return transcription.strip(), result
|
52 |
|
53 |
-
#
|
54 |
summarization_tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
55 |
summarization_model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
|
|
|
56 |
summarization_model.to(device)
|
57 |
|
58 |
-
# Graph-based summarization (TextRank)
|
59 |
def summarize_text(text):
|
60 |
sentences = sent_tokenize(text)
|
61 |
if len(sentences) == 0:
|
@@ -69,12 +75,20 @@ def summarize_text(text):
|
|
69 |
scores = nx.pagerank(nx_graph)
|
70 |
|
71 |
ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)
|
72 |
-
|
73 |
top_n = 3
|
74 |
summary = " ".join([s for _, s in ranked_sentences[:top_n]])
|
75 |
return summary
|
76 |
|
77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
def save_to_pdf(transcription, summary):
|
79 |
pdf = FPDF()
|
80 |
pdf.add_page()
|
@@ -83,6 +97,7 @@ def save_to_pdf(transcription, summary):
|
|
83 |
if transcription:
|
84 |
pdf.multi_cell(0, 10, "Transcription:\n" + transcription)
|
85 |
|
|
|
86 |
pdf.ln(10)
|
87 |
|
88 |
if summary:
|
@@ -92,7 +107,16 @@ def save_to_pdf(transcription, summary):
|
|
92 |
pdf.output(pdf_output_path)
|
93 |
return pdf_output_path
|
94 |
|
95 |
-
# Gradio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
iface = gr.Blocks()
|
97 |
|
98 |
with iface:
|
@@ -171,5 +195,6 @@ with iface:
|
|
171 |
outputs=[pdf_output_both]
|
172 |
)
|
173 |
|
174 |
-
|
|
|
175 |
iface.launch(share=True, debug=True)
|
|
|
14 |
from PIL import Image
|
15 |
import time
|
16 |
import os
|
17 |
+
# import spaces
|
18 |
+
|
19 |
|
20 |
warnings.filterwarnings("ignore")
|
21 |
|
22 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
23 |
torch_dtype = torch.float32
|
24 |
|
25 |
+
# ASR pipeline
|
26 |
pipe = pipeline("automatic-speech-recognition", model="NbAiLabBeta/nb-whisper-large-semantic", device=device, torch_dtype=torch.float32)
|
27 |
|
28 |
+
# Switch m4a to wav
|
29 |
def convert_to_wav(audio_file):
|
30 |
audio = AudioSegment.from_file(audio_file, format="m4a")
|
31 |
wav_file = "temp.wav"
|
32 |
audio.export(wav_file, format="wav")
|
33 |
return wav_file
|
34 |
|
35 |
+
# @spaces.GPU(queue=True)
|
36 |
+
|
37 |
+
|
38 |
+
# Transcription funct.@ASR pipeline
|
39 |
def transcribe_audio(audio_file):
|
40 |
if audio_file.endswith(".m4a"):
|
41 |
audio_file = convert_to_wav(audio_file)
|
|
|
55 |
|
56 |
return transcription.strip(), result
|
57 |
|
58 |
+
# t5-base model@summary funct.
|
59 |
summarization_tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
60 |
summarization_model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
|
61 |
+
# t5-base to device
|
62 |
summarization_model.to(device)
|
63 |
|
64 |
+
# Graph-based summarization (TextRank, method)
|
65 |
def summarize_text(text):
|
66 |
sentences = sent_tokenize(text)
|
67 |
if len(sentences) == 0:
|
|
|
75 |
scores = nx.pagerank(nx_graph)
|
76 |
|
77 |
ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)
|
78 |
+
# Select top N sentences (e.g., 3 sentences for the summary)
|
79 |
top_n = 3
|
80 |
summary = " ".join([s for _, s in ranked_sentences[:top_n]])
|
81 |
return summary
|
82 |
|
83 |
+
|
84 |
+
# HTML syntax for imagery
|
85 |
+
image_html = """
|
86 |
+
<div style="text-align: center;">
|
87 |
+
<img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/raw/main/picture.png" alt="Banner" width="85%" height="auto">
|
88 |
+
</div>
|
89 |
+
"""
|
90 |
+
|
91 |
+
# Transcription and summary@PDF option(s)
|
92 |
def save_to_pdf(transcription, summary):
|
93 |
pdf = FPDF()
|
94 |
pdf.add_page()
|
|
|
97 |
if transcription:
|
98 |
pdf.multi_cell(0, 10, "Transcription:\n" + transcription)
|
99 |
|
100 |
+
# paragraph space
|
101 |
pdf.ln(10)
|
102 |
|
103 |
if summary:
|
|
|
107 |
pdf.output(pdf_output_path)
|
108 |
return pdf_output_path
|
109 |
|
110 |
+
# Gradio
|
111 |
+
iface = gr.Interface(
|
112 |
+
fn=transcribe_audio,
|
113 |
+
inputs=gr.Audio(type="filepath"),
|
114 |
+
outputs="text",
|
115 |
+
title="Audio Transcription App",
|
116 |
+
description="Upload an audio file to get the transcription",
|
117 |
+
theme="default",
|
118 |
+
live=False
|
119 |
+
)
|
120 |
iface = gr.Blocks()
|
121 |
|
122 |
with iface:
|
|
|
195 |
outputs=[pdf_output_both]
|
196 |
)
|
197 |
|
198 |
+
|
199 |
+
# run
|
200 |
iface.launch(share=True, debug=True)
|