camparchimedes commited on
Commit
dda0718
·
verified ·
1 Parent(s): b98f4ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -9
app.py CHANGED
@@ -14,23 +14,28 @@ from fpdf import FPDF
14
  from PIL import Image
15
  import time
16
  import os
 
 
17
 
18
  warnings.filterwarnings("ignore")
19
 
20
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
21
  torch_dtype = torch.float32
22
 
23
- # Initialize the ASR pipeline
24
  pipe = pipeline("automatic-speech-recognition", model="NbAiLabBeta/nb-whisper-large-semantic", device=device, torch_dtype=torch.float32)
25
 
26
- # Function to convert m4a files to wav
27
  def convert_to_wav(audio_file):
28
  audio = AudioSegment.from_file(audio_file, format="m4a")
29
  wav_file = "temp.wav"
30
  audio.export(wav_file, format="wav")
31
  return wav_file
32
 
33
- # Transcription function using the ASR pipeline
 
 
 
34
  def transcribe_audio(audio_file):
35
  if audio_file.endswith(".m4a"):
36
  audio_file = convert_to_wav(audio_file)
@@ -50,12 +55,13 @@ def transcribe_audio(audio_file):
50
 
51
  return transcription.strip(), result
52
 
53
- # Summarization model setup
54
  summarization_tokenizer = AutoTokenizer.from_pretrained("t5-base")
55
  summarization_model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
 
56
  summarization_model.to(device)
57
 
58
- # Graph-based summarization (TextRank)
59
  def summarize_text(text):
60
  sentences = sent_tokenize(text)
61
  if len(sentences) == 0:
@@ -69,12 +75,20 @@ def summarize_text(text):
69
  scores = nx.pagerank(nx_graph)
70
 
71
  ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)
72
-
73
  top_n = 3
74
  summary = " ".join([s for _, s in ranked_sentences[:top_n]])
75
  return summary
76
 
77
- # Save transcription and summary to PDF
 
 
 
 
 
 
 
 
78
  def save_to_pdf(transcription, summary):
79
  pdf = FPDF()
80
  pdf.add_page()
@@ -83,6 +97,7 @@ def save_to_pdf(transcription, summary):
83
  if transcription:
84
  pdf.multi_cell(0, 10, "Transcription:\n" + transcription)
85
 
 
86
  pdf.ln(10)
87
 
88
  if summary:
@@ -92,7 +107,16 @@ def save_to_pdf(transcription, summary):
92
  pdf.output(pdf_output_path)
93
  return pdf_output_path
94
 
95
- # Gradio Interface setup
 
 
 
 
 
 
 
 
 
96
  iface = gr.Blocks()
97
 
98
  with iface:
@@ -171,5 +195,6 @@ with iface:
171
  outputs=[pdf_output_both]
172
  )
173
 
174
- # Run the Gradio interface
 
175
  iface.launch(share=True, debug=True)
 
14
  from PIL import Image
15
  import time
16
  import os
17
+ # import spaces
18
+
19
 
20
  warnings.filterwarnings("ignore")
21
 
22
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
23
  torch_dtype = torch.float32
24
 
25
+ # ASR pipeline
26
  pipe = pipeline("automatic-speech-recognition", model="NbAiLabBeta/nb-whisper-large-semantic", device=device, torch_dtype=torch.float32)
27
 
28
+ # Switch m4a to wav
29
  def convert_to_wav(audio_file):
30
  audio = AudioSegment.from_file(audio_file, format="m4a")
31
  wav_file = "temp.wav"
32
  audio.export(wav_file, format="wav")
33
  return wav_file
34
 
35
+ # @spaces.GPU(queue=True)
36
+
37
+
38
+ # Transcription funct.@ASR pipeline
39
  def transcribe_audio(audio_file):
40
  if audio_file.endswith(".m4a"):
41
  audio_file = convert_to_wav(audio_file)
 
55
 
56
  return transcription.strip(), result
57
 
58
+ # t5-base model@summary funct.
59
  summarization_tokenizer = AutoTokenizer.from_pretrained("t5-base")
60
  summarization_model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
61
+ # t5-base to device
62
  summarization_model.to(device)
63
 
64
+ # Graph-based summarization (TextRank, method)
65
  def summarize_text(text):
66
  sentences = sent_tokenize(text)
67
  if len(sentences) == 0:
 
75
  scores = nx.pagerank(nx_graph)
76
 
77
  ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)
78
+ # Select top N sentences (e.g., 3 sentences for the summary)
79
  top_n = 3
80
  summary = " ".join([s for _, s in ranked_sentences[:top_n]])
81
  return summary
82
 
83
+
84
+ # HTML syntax for imagery
85
+ image_html = """
86
+ <div style="text-align: center;">
87
+ <img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/raw/main/picture.png" alt="Banner" width="85%" height="auto">
88
+ </div>
89
+ """
90
+
91
+ # Transcription and summary@PDF option(s)
92
  def save_to_pdf(transcription, summary):
93
  pdf = FPDF()
94
  pdf.add_page()
 
97
  if transcription:
98
  pdf.multi_cell(0, 10, "Transcription:\n" + transcription)
99
 
100
+ # paragraph space
101
  pdf.ln(10)
102
 
103
  if summary:
 
107
  pdf.output(pdf_output_path)
108
  return pdf_output_path
109
 
110
+ # Gradio
111
+ iface = gr.Interface(
112
+ fn=transcribe_audio,
113
+ inputs=gr.Audio(type="filepath"),
114
+ outputs="text",
115
+ title="Audio Transcription App",
116
+ description="Upload an audio file to get the transcription",
117
+ theme="default",
118
+ live=False
119
+ )
120
  iface = gr.Blocks()
121
 
122
  with iface:
 
195
  outputs=[pdf_output_both]
196
  )
197
 
198
+
199
+ # run
200
  iface.launch(share=True, debug=True)