camparchimedes commited on
Commit
0ac786e
·
verified ·
1 Parent(s): 2920f00

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -51
app.py CHANGED
@@ -1,4 +1,11 @@
1
 
 
 
 
 
 
 
 
2
  import time
3
  import os
4
  import spaces
@@ -19,26 +26,28 @@ from transformers import pipeline # AutoProcessor, AutoModelForSpeechSeq2Seq
19
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
20
  torch_dtype = torch.float32
21
 
22
- asr = pipeline("automatic-speech-recognition", model="NbAiLabBeta/nb-whisper-large", device=device, torch_dtype=torch_dtype)
23
 
24
- @spaces.GPU(queue=True)
25
- def transcribe_audio(audio_file):
26
  if audio_file.endswith(".m4a"):
27
  audio_file = convert_to_wav(audio_file)
28
 
29
  start_time = time.time()
30
-
 
 
31
  with torch.no_grad():
32
- output = asr(audio_file, chunk_length_s=30, generate_kwargs={"num_beams": 8}) # "task": "transcribe", "language": "no"
33
 
34
- transcription = output["text"]
35
  end_time = time.time()
36
  output_time = end_time - start_time
37
- word_count = len(transcription.split())
38
 
39
  result = f"Time taken: {output_time:.2f} seconds\nNumber of words: {word_count}"
40
 
41
- return transcription.strip(), result
42
 
43
  # [VERSION 3: full-on w/ 3 styles for summarization]
44
  import nltk
@@ -56,11 +65,6 @@ nltk.download('stopwords')
56
 
57
  WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
58
 
59
- def transcribe(audio_file):
60
- transcription, result = transcribe_audio(audio_file)
61
- text = transcription
62
- return text, result
63
-
64
  def clean_text(text):
65
  text = re.sub(r'https?:\/\/.*[\r\n]*', '', str(text), flags=re.MULTILINE)
66
  text = re.sub(r'\<a href', ' ', str(text))
@@ -96,7 +100,7 @@ summarization_model = AutoModelForSeq2SeqLM.from_pretrained("t5-base", return_di
96
  summarization_tokenizer = AutoTokenizer.from_pretrained("t5-base")
97
  summarization_model.to(device)
98
 
99
- @spaces.GPU(queue=True)
100
  def summarize_text(text):
101
  preprocessed_text = preprocess_text(text)
102
  if preprocessed_text is None:
@@ -174,13 +178,13 @@ import gradio as gr
174
  from fpdf import FPDF
175
  from PIL import Image
176
 
177
- def save_to_pdf(transcription, summary):
178
  pdf = FPDF()
179
  pdf.add_page()
180
  pdf.set_font("Arial", size=12)
181
 
182
- if transcription:
183
- pdf.multi_cell(0, 10, "Transcription:\n" + transcription)
184
 
185
  # paragraph space
186
  pdf.ln(10)
@@ -194,16 +198,16 @@ def save_to_pdf(transcription, summary):
194
 
195
  banner_html = """
196
  <div style="text-align: center;">
197
- <img src="https://huggingface.co/spaces/camparchimedes/transcription_app/raw/main/picture.png" alt="Banner" width="100%" height="auto">
198
  </div>
199
  """
200
 
201
  iface = gr.Interface(
202
  fn=transcribe_audio,
203
  inputs=gr.Audio(type="filepath"),
204
- outputs="text",
205
  title="SW Transcription App",
206
- description="Upload an audio file to get the transcription",
207
  theme="default",
208
  live=False
209
  )
@@ -218,17 +222,18 @@ with iface:
218
 
219
  with gr.TabItem("Transcription"):
220
  audio_input = gr.Audio(type="filepath")
221
- transcription_output = gr.Textbox(label="Transcription")
222
  result_output = gr.Textbox(label="Time taken and Number of words")
223
  transcribe_button = gr.Button("Transcribe")
224
 
225
  def transcribe(audio_file):
226
- transcription, result = transcribe_audio(audio_file)
227
- return transcription, result
 
228
  transcribe_button.click(
229
  fn=transcribe,
230
  inputs=[audio_input],
231
- outputs=[transcription_output, result_output]
232
  )
233
 
234
 
@@ -236,15 +241,15 @@ with iface:
236
  summary_output = gr.Textbox(label="Summary | Graph-based")
237
  summarize_button = gr.Button("Summarize")
238
 
239
- def summarize(transcription):
240
- if not transcription:
241
- return "Warning: a transcription must be available."
242
- summary = graph_based_summary(transcription)
243
  return summary
244
 
245
  summarize_button.click(
246
  fn=summarize,
247
- inputs=[transcription_output],
248
  outputs=summary_output
249
  )
250
 
@@ -252,15 +257,15 @@ with iface:
252
  summary_output = gr.Textbox(label="Summary | LexRank")
253
  summarize_button = gr.Button("Summarize")
254
 
255
- def summarize(transcription):
256
- if not transcription:
257
- return "Warning: a transcription must be available."
258
- summary = lex_rank_summary(transcription)
259
  return summary
260
 
261
  summarize_button.click(
262
  fn=summarize,
263
- inputs=[transcription_output],
264
  outputs=summary_output
265
  )
266
 
@@ -268,40 +273,40 @@ with iface:
268
  summary_output = gr.Textbox(label="Summary | TextRank")
269
  summarize_button = gr.Button("Summarize")
270
 
271
- def summarize(transcription):
272
- if not transcription:
273
- return "Warning: a transcription must be available."
274
- summary = text_rank_summary(transcription)
275
  return summary
276
 
277
  summarize_button.click(
278
  fn=summarize,
279
- inputs=[transcription_output],
280
  outputs=summary_output
281
  )
282
 
283
  with gr.TabItem("Download PDF"):
284
- pdf_transcription_only = gr.Button("Download PDF with Transcription Only")
285
  pdf_summary_only = gr.Button("Download PDF with Summary Only")
286
  pdf_both = gr.Button("Download PDF with Both")
287
 
288
- pdf_output_transcription_only = gr.File(label="Download PDF")
289
  pdf_output_summary_only = gr.File(label="Download PDF")
290
  pdf_output_both = gr.File(label="Download PDF")
291
 
292
- def generate_pdf_transcription_only(transcription):
293
- return save_to_pdf(transcription, "")
294
 
295
  def generate_pdf_summary_only(summary):
296
  return save_to_pdf("", summary)
297
 
298
- def generate_pdf_both(transcription, summary):
299
- return save_to_pdf(transcription, summary)
300
 
301
- pdf_transcription_only.click(
302
- fn=generate_pdf_transcription_only,
303
- inputs=[transcription_output],
304
- outputs=[pdf_output_transcription_only]
305
  )
306
 
307
  pdf_summary_only.click(
@@ -312,9 +317,8 @@ with iface:
312
 
313
  pdf_both.click(
314
  fn=generate_pdf_both,
315
- inputs=[transcription_output, summary_output],
316
  outputs=[pdf_output_both]
317
  )
318
 
319
  iface.launch(share=True, debug=True)
320
-
 
1
 
2
+
3
+ # -----------------COPY OF NEW EDITION[app.py]-----------------
4
+
5
+ # check if still the case...........??*********************************************
6
+ # "The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results."
7
+
8
+
9
  import time
10
  import os
11
  import spaces
 
26
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
27
  torch_dtype = torch.float32
28
 
29
+ pipe = pipeline("automatic-speech-recognition", model="NbAiLabBeta/nb-whisper-large", device=device, torch_dtype=torch_dtype)
30
 
31
+ # @spaces.GPU(queue=True)
32
+ def transcribe_audio(audio_file, forced_decoder_ids):
33
  if audio_file.endswith(".m4a"):
34
  audio_file = convert_to_wav(audio_file)
35
 
36
  start_time = time.time()
37
+ forced_decoder_ids = processor.get_decoder_prompt_ids(language=language, task=task)
38
+ # check if still the case...........??*********************************************
39
+ # "You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, 50288], [2, 50360], [3, 50364]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe."
40
  with torch.no_grad():
41
+ output = pipe(audio_file, chunk_length_s=30, generate_kwargs={"forced_decoder_ids”: forced_decoder_ids}", "num_beams": 8, "language": "norwegian"}) # "task": "transcribe",
42
 
43
+ text = output["text"]
44
  end_time = time.time()
45
  output_time = end_time - start_time
46
+ word_count = len(text.split())
47
 
48
  result = f"Time taken: {output_time:.2f} seconds\nNumber of words: {word_count}"
49
 
50
+ return text, result
51
 
52
  # [VERSION 3: full-on w/ 3 styles for summarization]
53
  import nltk
 
65
 
66
  WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
67
 
 
 
 
 
 
68
  def clean_text(text):
69
  text = re.sub(r'https?:\/\/.*[\r\n]*', '', str(text), flags=re.MULTILINE)
70
  text = re.sub(r'\<a href', ' ', str(text))
 
100
  summarization_tokenizer = AutoTokenizer.from_pretrained("t5-base")
101
  summarization_model.to(device)
102
 
103
+ # @spaces.GPU(queue=True)
104
  def summarize_text(text):
105
  preprocessed_text = preprocess_text(text)
106
  if preprocessed_text is None:
 
178
  from fpdf import FPDF
179
  from PIL import Image
180
 
181
+ def save_to_pdf(text, summary):
182
  pdf = FPDF()
183
  pdf.add_page()
184
  pdf.set_font("Arial", size=12)
185
 
186
+ if text:
187
+ pdf.multi_cell(0, 10, "text:\n" + text)
188
 
189
  # paragraph space
190
  pdf.ln(10)
 
198
 
199
  banner_html = """
200
  <div style="text-align: center;">
201
+ <img src="https://huggingface.co/spaces/camparchimedes/text_app/raw/main/picture.png" alt="Banner" width="100%" height="auto">
202
  </div>
203
  """
204
 
205
  iface = gr.Interface(
206
  fn=transcribe_audio,
207
  inputs=gr.Audio(type="filepath"),
208
+ outputs="transcription",
209
  title="SW Transcription App",
210
+ description="Upload an audio file to get the text",
211
  theme="default",
212
  live=False
213
  )
 
222
 
223
  with gr.TabItem("Transcription"):
224
  audio_input = gr.Audio(type="filepath")
225
+ text_output = gr.Textbox(label="text")
226
  result_output = gr.Textbox(label="Time taken and Number of words")
227
  transcribe_button = gr.Button("Transcribe")
228
 
229
  def transcribe(audio_file):
230
+ text, result = transcribe_audio(audio_file)
231
+ return text, result
232
+
233
  transcribe_button.click(
234
  fn=transcribe,
235
  inputs=[audio_input],
236
+ outputs=[text_output, result_output]
237
  )
238
 
239
 
 
241
  summary_output = gr.Textbox(label="Summary | Graph-based")
242
  summarize_button = gr.Button("Summarize")
243
 
244
+ def summarize(text):
245
+ if not text:
246
+ return "Warning: a text must be available."
247
+ summary = graph_based_summary(text)
248
  return summary
249
 
250
  summarize_button.click(
251
  fn=summarize,
252
+ inputs=[text_output],
253
  outputs=summary_output
254
  )
255
 
 
257
  summary_output = gr.Textbox(label="Summary | LexRank")
258
  summarize_button = gr.Button("Summarize")
259
 
260
+ def summarize(text):
261
+ if not text:
262
+ return "Warning: a text must be available."
263
+ summary = lex_rank_summary(text)
264
  return summary
265
 
266
  summarize_button.click(
267
  fn=summarize,
268
+ inputs=[text_output],
269
  outputs=summary_output
270
  )
271
 
 
273
  summary_output = gr.Textbox(label="Summary | TextRank")
274
  summarize_button = gr.Button("Summarize")
275
 
276
+ def summarize(text):
277
+ if not text:
278
+ return "Warning: a text must be available."
279
+ summary = text_rank_summary(text)
280
  return summary
281
 
282
  summarize_button.click(
283
  fn=summarize,
284
+ inputs=[text_output],
285
  outputs=summary_output
286
  )
287
 
288
  with gr.TabItem("Download PDF"):
289
+ pdf_text_only = gr.Button("Download PDF with text Only")
290
  pdf_summary_only = gr.Button("Download PDF with Summary Only")
291
  pdf_both = gr.Button("Download PDF with Both")
292
 
293
+ pdf_output_text_only = gr.File(label="Download PDF")
294
  pdf_output_summary_only = gr.File(label="Download PDF")
295
  pdf_output_both = gr.File(label="Download PDF")
296
 
297
+ def generate_pdf_text_only(text):
298
+ return save_to_pdf(text, "")
299
 
300
  def generate_pdf_summary_only(summary):
301
  return save_to_pdf("", summary)
302
 
303
+ def generate_pdf_both(text, summary):
304
+ return save_to_pdf(text, summary)
305
 
306
+ pdf_text_only.click(
307
+ fn=generate_pdf_text_only,
308
+ inputs=[text_output],
309
+ outputs=[pdf_output_text_only]
310
  )
311
 
312
  pdf_summary_only.click(
 
317
 
318
  pdf_both.click(
319
  fn=generate_pdf_both,
320
+ inputs=[text_output, summary_output],
321
  outputs=[pdf_output_both]
322
  )
323
 
324
  iface.launch(share=True, debug=True)