camparchimedes commited on
Commit
8c6ad91
·
verified ·
1 Parent(s): 38c9a82

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -268
app.py CHANGED
@@ -1,138 +1,80 @@
1
  import time
2
  import os
3
- import spaces
4
- import contextlib
5
  import warnings
6
- warnings.filterwarnings("ignore")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
 
 
8
 
9
- from pydub import AudioSegment
 
 
10
 
11
- # If m4a audio, convert to wav (Python)
12
  def convert_to_wav(audio_file):
13
  audio = AudioSegment.from_file(audio_file, format="m4a")
14
  wav_file = "temp.wav"
15
  audio.export(wav_file, format="wav")
16
  return wav_file
17
 
18
- # path/filename: /mnt/data/switchdev.py
19
-
20
- import torch
21
- from transformers import pipeline, WhisperForConditionalGeneration
22
 
23
- def get_device():
24
- # This function defers the device check until it's actually needed
25
- return "cuda" if torch.cuda.is_available() else "cpu"
26
-
27
- # prepare decoder input IDs for generation
28
- def prepare_decoder_input_ids_for_generation_patch(self, batch_size, model_input_name, model_kwargs, decoder_start_token_id, bos_token_id, device):
29
- if 'decoder_input_ids' not in model_kwargs:
30
- return torch.ones((batch_size, 1), dtype=torch.long) * decoder_start_token_id, model_kwargs
31
- else:
32
- return model_kwargs.pop('decoder_input_ids'), model_kwargs
33
-
34
- # Patch the WhisperForConditionalGeneration class
35
- WhisperForConditionalGeneration._prepare_decoder_input_ids_for_generation = prepare_decoder_input_ids_for_generation_patch
36
-
37
- def create_pipeline():
38
- # Only initialize the device when the pipeline is created
39
- device = get_device()
40
- try:
41
- pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", device=device)
42
- except RuntimeError as e:
43
- if "CUDA error" in str(e):
44
- print("CUDA initialization failed. Falling back to CPU.")
45
- pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", device="cpu")
46
- else:
47
- raise e
48
-
49
- # pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", device=device)
50
- return pipe
51
 
 
52
  def transcribe_audio(audio_file):
53
  if audio_file.endswith(".m4a"):
54
  audio_file = convert_to_wav(audio_file)
55
-
56
- pipe = create_pipeline() # Initialize the pipeline here
57
 
58
  start_time = time.time()
59
-
60
- # transcribe
61
- output = pipe(audio_file)
62
-
63
- # get text
64
  text = output["text"]
65
- end_time = time.time()
66
- output_time = end_time - start_time
67
- word_count = len(text.split())
68
-
69
- # summary
70
- result = f"Time taken: {output_time:.2f} seconds\nNumber of words: {word_count}"
71
 
 
72
  return text, result
73
 
74
-
75
- import nltk
76
- from nltk.tokenize import word_tokenize, sent_tokenize
77
- from nltk.corpus import stopwords
78
- import networkx as nx
79
- from sklearn.feature_extraction.text import TfidfVectorizer
80
- from sklearn.metrics.pairwise import cosine_similarity
81
- import pandas as pd
82
- import numpy as np
83
- import re
84
-
85
- nltk.download('punkt')
86
- nltk.download('stopwords')
87
-
88
- WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
89
-
90
  def clean_text(text):
91
- text = re.sub(r'https?:\/\/.*[\r\n]*', '', str(text), flags=re.MULTILINE)
92
- text = re.sub(r'\<a href', ' ', str(text))
93
- text = re.sub(r'&amp;', '', str(text))
94
- text = re.sub(r'\(s+', '(', str(text))
95
- text = re.sub(r's+\)', ')', str(text))
96
- text = re.sub(r'\(\)', '', str(text))
97
- text = re.sub(r'\s+', ' ', str(text))
98
- text = re.sub(r'[_"\-;%|+&=*%!?:#$@\[\]]', ' ', str(text))
99
- text = re.sub(r'<br />', ' ', str(text))
100
- text = re.sub(r'\'', '', str(text))
101
- text = re.sub(r'«', '', str(text))
102
- text = re.sub(r'»', '', str(text))
103
- text = re.sub(r'–', '-', str(text))
104
- text = re.sub(r'…', '.', str(text))
105
- text = re.sub(r'[^\x00-\x7F]+', ' ', str(text))
106
  return text
107
 
108
  def preprocess_text(text):
109
- try:
110
- words = word_tokenize(text)
111
- stop_words = set(stopwords.words('norwegian'))
112
- words_without_stopwords = [word for word in words if word.lower() not in stop_words]
113
- processed_text = ' '.join(words_without_stopwords)
114
- return processed_text
115
- except Exception as e:
116
- st.error(f"Error during text preprocessing: {e}")
117
- return None
118
-
119
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
120
-
121
- device = "cuda" if torch.cuda.is_available() else "cpu"
122
- summarization_model = AutoModelForSeq2SeqLM.from_pretrained("t5-base", torch_dtype=torch.float16)
123
- summarization_tokenizer = AutoTokenizer.from_pretrained("t5-base")
124
- summarization_model.to(device)
125
 
 
126
  def summarize_text(text):
127
  preprocessed_text = preprocess_text(text)
128
- if preprocessed_text is None:
129
- return None
130
- inputs = summarization_tokenizer([text], max_length=1024, return_tensors="pt", truncation=True)
131
  inputs = inputs.to(device)
132
  summary_ids = summarization_model.generate(inputs.input_ids, num_beams=5, max_length=150, early_stopping=True)
133
- summary = summarization_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
134
- return summary
135
 
 
136
  def build_similarity_matrix(sentences, stop_words):
137
  similarity_matrix = nx.Graph()
138
  for i, tokens_a in enumerate(sentences):
@@ -142,10 +84,12 @@ def build_similarity_matrix(sentences, stop_words):
142
  similarity_matrix.add_edge(i, j, weight=len(common_words))
143
  return similarity_matrix
144
 
 
145
  def graph_based_summary(text, num_paragraphs=3):
146
- sentences = text.strip().split(".")
147
  if len(sentences) < num_paragraphs:
148
  return sentences
 
149
  sentence_tokens = [word_tokenize(sent) for sent in sentences]
150
  stop_words = set(stopwords.words('norwegian'))
151
  filtered_tokens = [[word for word in tokens if word.lower() not in stop_words] for tokens in sentence_tokens]
@@ -153,53 +97,27 @@ def graph_based_summary(text, num_paragraphs=3):
153
 
154
  scores = nx.pagerank(similarity_matrix)
155
  ranked_sentences = sorted(((scores[i], sent) for i, sent in enumerate(sentences)), reverse=True)
156
- summary = [sent for _, sent in ranked_sentences[:num_paragraphs]]
157
- return summary
158
 
 
159
  def lex_rank_summary(text, num_paragraphs=3, threshold=0.1):
160
  sentences = nltk.sent_tokenize(text)
161
  if len(sentences) < num_paragraphs:
162
  return sentences
 
163
  stop_words = set(stopwords.words('norwegian'))
164
  vectorizer = TfidfVectorizer(stop_words=list(stop_words))
165
  X = vectorizer.fit_transform(sentences)
166
  similarity_matrix = cosine_similarity(X, X)
167
 
168
- # pipe it
169
- pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", device=device)
170
-
171
- def transcribe_audio(audio_file):
172
-
173
- if audio_file.endswith(".m4a"):
174
- audio_file = convert_to_wav(audio_file)
175
-
176
- start_time = time.time()
177
-
178
- # transcribe
179
- output = pipe(audio_file)
180
-
181
- # get text
182
- text = output["text"]
183
- end_time = time.time()
184
- output_time = end_time - start_time
185
- word_count = len(text.split())
186
-
187
- # summary
188
- result = f"Time taken: {output_time:.2f} seconds\nNumber of words: {word_count}"
189
-
190
- return text, result
191
- for i in range(len(similarity_matrix)): # threshold
192
- for j in range(len(similarity_matrix[i])):
193
- if similarity_matrix[i][j] < threshold:
194
- similarity_matrix[i][j] = 0.0
195
-
196
  nx_graph = nx.from_numpy_array(similarity_matrix)
197
  scores = nx.pagerank(nx_graph)
198
  ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)
199
- summary = [ranked_sentences[i][1] for i in range(num_paragraphs)]
200
- return summary
201
-
202
 
 
203
  def text_rank_summary(text, num_paragraphs=3):
204
  sentences = nltk.sent_tokenize(text)
205
  if len(sentences) < num_paragraphs:
@@ -210,57 +128,21 @@ def text_rank_summary(text, num_paragraphs=3):
210
  X = vectorizer.fit_transform(sentences)
211
  similarity_matrix = cosine_similarity(X, X)
212
 
213
- nx_graph = nx.from_numpy_array(similarity_matrix) # graph, nodes (i.e sentences) & edges are similarity scores (is cool)
214
- scores = nx.pagerank(nx_graph) # PageRank algorithm, scoring sentences
215
- ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True) # rank by PageRank scores
216
-
217
- summary = [ranked_sentences[i][1] for i in range(num_paragraphs)] # top sentences for summary
218
- return ' '.join(summary)
219
-
220
- banner_html = """
221
- <div style="text-align: center;">
222
- <img src="https://huggingface.co/spaces/camparchimedes/transcription_app/blob/main/lol.webp" alt="" width="100%" height="auto">
223
- </div>
224
- """
225
- # https://raw.huggingface.co/spaces/camparchimedes/transcription_app/blob/main/banner_trans.png
226
-
227
-
228
- import gradio as gr
229
- from fpdf import FPDF
230
- from PIL import Image
231
 
 
232
  def save_to_pdf(text, summary):
233
  pdf = FPDF()
234
- pdf.add_pag# pipe it
235
- pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", device=device)
236
-
237
- def transcribe_audio(audio_file):
238
-
239
- if audio_file.endswith(".m4a"):
240
- audio_file = convert_to_wav(audio_file)
241
-
242
- start_time = time.time()
243
-
244
- # transcribe
245
- output = pipe(audio_file)
246
-
247
- # get text
248
- text = output["text"]
249
- end_time = time.time()
250
- output_time = end_time - start_time
251
- word_count = len(text.split())
252
-
253
- # summary
254
- result = f"Time taken: {output_time:.2f} seconds\nNumber of words: {word_count}"
255
-
256
- return text, resulte()
257
  pdf.set_font("Arial", size=12)
258
 
259
  if text:
260
- pdf.multi_cell(0, 10, "text:\n" + text)
261
 
262
- # paragraph space
263
- pdf.ln(10)
264
 
265
  if summary:
266
  pdf.multi_cell(0, 10, "Summary:\n" + summary)
@@ -269,122 +151,53 @@ def transcribe_audio(audio_file):
269
  pdf.output(pdf_output_path)
270
  return pdf_output_path
271
 
272
-
273
-
274
- iface = gr.Interface(
275
- fn=transcribe_audio,
276
- inputs=gr.Audio(type="filepath"),
277
- outputs=gr.Textbox(label="Transcription"),
278
- title="SW Transcription App",
279
- description="Upload an audio file to get the text",
280
- theme="default",
281
- live=False
282
- )
283
-
284
-
285
  iface = gr.Blocks()
286
 
287
  with iface:
288
- gr.HTML(banner_html)
 
 
 
 
289
  gr.Markdown("# Vi har nå muligheten til å oversette lydfiler til norsk skrift.")
290
 
291
  with gr.Tabs():
292
-
293
  with gr.TabItem("Transcription"):
294
  audio_input = gr.Audio(type="filepath")
295
- text_output = gr.Textbox(label="text")
296
  result_output = gr.Textbox(label="Time taken and Number of words")
297
  transcribe_button = gr.Button("Transcribe")
298
 
299
- transcribe_button.click(
300
- fn=transcribe_audio,
301
- inputs=[audio_input],
302
- outputs=[text_output, result_output]
303
- )
304
 
305
-
306
- with gr.TabItem("Summary_t1"):
307
  summary_output = gr.Textbox(label="Summary | Graph-based")
308
  summarize_button = gr.Button("Summarize")
309
 
310
- def summarize(text):
311
- if not text:
312
- return "Warning: a text must be available."
313
- summary = graph_based_summary(text)
314
- return summary
315
-
316
- summarize_button.click(
317
- fn=summarize,
318
- inputs=[text_output],
319
- outputs=summary_output
320
- )
321
 
322
- with gr.TabItem("LexRank"):
323
  summary_output = gr.Textbox(label="Summary | LexRank")
324
  summarize_button = gr.Button("Summarize")
325
 
326
- def summarize(text):
327
- if not text:
328
- return "Warning: a text must be available."
329
- summary = lex_rank_summary(text)
330
- return summary
331
-
332
- summarize_button.click(
333
- fn=summarize,
334
- inputs=[text_output],
335
- outputs=summary_output
336
- )
337
 
338
- with gr.TabItem("TextRank"):
339
  summary_output = gr.Textbox(label="Summary | TextRank")
340
  summarize_button = gr.Button("Summarize")
341
 
342
- def summarize(text):
343
- if not text:
344
- return "Warning: a text must be available."
345
- summary = text_rank_summary(text)
346
- return summary
347
-
348
- summarize_button.click(
349
- fn=summarize,
350
- inputs=[text_output],
351
- outputs=summary_output
352
- )
353
 
354
  with gr.TabItem("Download PDF"):
355
- pdf_text_only = gr.Button("Download PDF with text Only")
356
  pdf_summary_only = gr.Button("Download PDF with Summary Only")
357
  pdf_both = gr.Button("Download PDF with Both")
358
 
359
- pdf_output_text_only = gr.File(label="Download PDF")
360
- pdf_output_summary_only = gr.File(label="Download PDF")
361
- pdf_output_both = gr.File(label="Download PDF")
362
-
363
- def generate_pdf_text_only(text):
364
- return save_to_pdf(text, "")
365
-
366
- def generate_pdf_summary_only(summary):
367
- return save_to_pdf("", summary)
368
-
369
- def generate_pdf_both(text, summary):
370
- return save_to_pdf(text, summary)
371
-
372
- pdf_text_only.click(
373
- fn=generate_pdf_text_only,
374
- inputs=[text_output],
375
- outputs=[pdf_output_text_only]
376
- )
377
-
378
- pdf_summary_only.click(
379
- fn=generate_pdf_summary_only,
380
- inputs=[summary_output],
381
- outputs=[pdf_output_summary_only]
382
- )
383
 
384
- pdf_both.click(
385
- fn=generate_pdf_both,
386
- inputs=[text_output, summary_output],
387
- outputs=[pdf_output_both]
388
- )
389
 
390
  iface.launch(share=True, debug=True)
 
1
  import time
2
  import os
 
 
3
  import warnings
4
+ from pydub import AudioSegment
5
+ import torch
6
+ from transformers import pipeline, WhisperForConditionalGeneration
7
+ import nltk
8
+ from nltk.tokenize import word_tokenize
9
+ from nltk.corpus import stopwords
10
+ import networkx as nx
11
+ from sklearn.feature_extraction.text import TfidfVectorizer
12
+ from sklearn.metrics.pairwise import cosine_similarity
13
+ import pandas as pd
14
+ import numpy as np
15
+ import re
16
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
17
+ import gradio as gr
18
+ from fpdf import FPDF
19
+ from PIL import Image
20
 
21
+ # Suppress warnings
22
+ warnings.filterwarnings("ignore")
23
 
24
+ # NLTK dependencies
25
+ nltk.download('punkt', quiet=True)
26
+ nltk.download('stopwords', quiet=True)
27
 
28
+ # Convert m4a audio to wav format
29
  def convert_to_wav(audio_file):
30
  audio = AudioSegment.from_file(audio_file, format="m4a")
31
  wav_file = "temp.wav"
32
  audio.export(wav_file, format="wav")
33
  return wav_file
34
 
35
+ # Initialize device for torch
36
+ device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
37
 
38
+ # Load Whisper model and tokenizer
39
+ whisper_pipeline = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", device=device)
40
+ summarization_model = AutoModelForSeq2SeqLM.from_pretrained("t5-base", torch_dtype=torch.float16).to(device)
41
+ summarization_tokenizer = AutoTokenizer.from_pretrained("t5-base")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+ # Transcribe audio to text
44
  def transcribe_audio(audio_file):
45
  if audio_file.endswith(".m4a"):
46
  audio_file = convert_to_wav(audio_file)
 
 
47
 
48
  start_time = time.time()
49
+ output = whisper_pipeline(audio_file)
 
 
 
 
50
  text = output["text"]
51
+ output_time = time.time() - start_time
 
 
 
 
 
52
 
53
+ result = f"Time taken: {output_time:.2f} seconds\nNumber of words: {len(text.split())}"
54
  return text, result
55
 
56
+ # Clean and preprocess text for summarization
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  def clean_text(text):
58
+ text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)
59
+ text = re.sub(r'[^\w\s]', '', text)
60
+ text = re.sub(r'\s+', ' ', text).strip()
 
 
 
 
 
 
 
 
 
 
 
 
61
  return text
62
 
63
  def preprocess_text(text):
64
+ words = word_tokenize(text)
65
+ stop_words = set(stopwords.words('norwegian'))
66
+ words = [word for word in words if word.lower() not in stop_words]
67
+ return ' '.join(words)
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
+ # Summarize text using the T5 model
70
  def summarize_text(text):
71
  preprocessed_text = preprocess_text(text)
72
+ inputs = summarization_tokenizer(preprocessed_text, max_length=1024, return_tensors="pt", truncation=True)
 
 
73
  inputs = inputs.to(device)
74
  summary_ids = summarization_model.generate(inputs.input_ids, num_beams=5, max_length=150, early_stopping=True)
75
+ return summarization_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
 
76
 
77
+ # Build similarity matrix for graph-based summary
78
  def build_similarity_matrix(sentences, stop_words):
79
  similarity_matrix = nx.Graph()
80
  for i, tokens_a in enumerate(sentences):
 
84
  similarity_matrix.add_edge(i, j, weight=len(common_words))
85
  return similarity_matrix
86
 
87
+ # Graph-based summarization
88
  def graph_based_summary(text, num_paragraphs=3):
89
+ sentences = nltk.sent_tokenize(text)
90
  if len(sentences) < num_paragraphs:
91
  return sentences
92
+
93
  sentence_tokens = [word_tokenize(sent) for sent in sentences]
94
  stop_words = set(stopwords.words('norwegian'))
95
  filtered_tokens = [[word for word in tokens if word.lower() not in stop_words] for tokens in sentence_tokens]
 
97
 
98
  scores = nx.pagerank(similarity_matrix)
99
  ranked_sentences = sorted(((scores[i], sent) for i, sent in enumerate(sentences)), reverse=True)
100
+ return ' '.join([sent for _, sent in ranked_sentences[:num_paragraphs]])
 
101
 
102
+ # LexRank summarization
103
  def lex_rank_summary(text, num_paragraphs=3, threshold=0.1):
104
  sentences = nltk.sent_tokenize(text)
105
  if len(sentences) < num_paragraphs:
106
  return sentences
107
+
108
  stop_words = set(stopwords.words('norwegian'))
109
  vectorizer = TfidfVectorizer(stop_words=list(stop_words))
110
  X = vectorizer.fit_transform(sentences)
111
  similarity_matrix = cosine_similarity(X, X)
112
 
113
+ # Apply threshold to the similarity matrix
114
+ similarity_matrix[similarity_matrix < threshold] = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  nx_graph = nx.from_numpy_array(similarity_matrix)
116
  scores = nx.pagerank(nx_graph)
117
  ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)
118
+ return ' '.join([ranked_sentences[i][1] for i in range(num_paragraphs)])
 
 
119
 
120
+ # TextRank summarization
121
  def text_rank_summary(text, num_paragraphs=3):
122
  sentences = nltk.sent_tokenize(text)
123
  if len(sentences) < num_paragraphs:
 
128
  X = vectorizer.fit_transform(sentences)
129
  similarity_matrix = cosine_similarity(X, X)
130
 
131
+ nx_graph = nx.from_numpy_array(similarity_matrix)
132
+ scores = nx.pagerank(nx_graph)
133
+ ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)
134
+ return ' '.join([ranked_sentences[i][1] for i in range(num_paragraphs)])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
+ # Save text and summary to PDF
137
  def save_to_pdf(text, summary):
138
  pdf = FPDF()
139
+ pdf.add_page()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  pdf.set_font("Arial", size=12)
141
 
142
  if text:
143
+ pdf.multi_cell(0, 10, "Text:\n" + text)
144
 
145
+ pdf.ln(10) # Paragraph space
 
146
 
147
  if summary:
148
  pdf.multi_cell(0, 10, "Summary:\n" + summary)
 
151
  pdf.output(pdf_output_path)
152
  return pdf_output_path
153
 
154
+ # Gradio Interface
 
 
 
 
 
 
 
 
 
 
 
 
155
  iface = gr.Blocks()
156
 
157
  with iface:
158
+ gr.HTML("""
159
+ <div style="text-align: center;">
160
+ <img src="https://huggingface.co/spaces/camparchimedes/transcription_app/blob/main/lol.webp" alt="" width="100%" height="auto">
161
+ </div>
162
+ """)
163
  gr.Markdown("# Vi har nå muligheten til å oversette lydfiler til norsk skrift.")
164
 
165
  with gr.Tabs():
 
166
  with gr.TabItem("Transcription"):
167
  audio_input = gr.Audio(type="filepath")
168
+ text_output = gr.Textbox(label="Text")
169
  result_output = gr.Textbox(label="Time taken and Number of words")
170
  transcribe_button = gr.Button("Transcribe")
171
 
172
+ transcribe_button.click(fn=transcribe_audio, inputs=[audio_input], outputs=[text_output, result_output])
 
 
 
 
173
 
174
+ with gr.TabItem("Summary | Graph-based"):
 
175
  summary_output = gr.Textbox(label="Summary | Graph-based")
176
  summarize_button = gr.Button("Summarize")
177
 
178
+ summarize_button.click(fn=lambda text: graph_based_summary(text), inputs=[text_output], outputs=[summary_output])
 
 
 
 
 
 
 
 
 
 
179
 
180
+ with gr.TabItem("Summary | LexRank"):
181
  summary_output = gr.Textbox(label="Summary | LexRank")
182
  summarize_button = gr.Button("Summarize")
183
 
184
+ summarize_button.click(fn=lambda text: lex_rank_summary(text), inputs=[text_output], outputs=[summary_output])
 
 
 
 
 
 
 
 
 
 
185
 
186
+ with gr.TabItem("Summary | TextRank"):
187
  summary_output = gr.Textbox(label="Summary | TextRank")
188
  summarize_button = gr.Button("Summarize")
189
 
190
+ summarize_button.click(fn=lambda text: text_rank_summary(text), inputs=[text_output], outputs=[summary_output])
 
 
 
 
 
 
 
 
 
 
191
 
192
  with gr.TabItem("Download PDF"):
193
+ pdf_text_only = gr.Button("Download PDF with Text Only")
194
  pdf_summary_only = gr.Button("Download PDF with Summary Only")
195
  pdf_both = gr.Button("Download PDF with Both")
196
 
197
+ pdf_output = gr.File(label="Download PDF")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
 
199
+ pdf_text_only.click(fn=lambda text: save_to_pdf(text, ""), inputs=[text_output], outputs=[pdf_output])
200
+ pdf_summary_only.click(fn=lambda summary: save_to_pdf("", summary), inputs=[summary_output], outputs=[pdf_output])
201
+ pdf_both.click(fn=lambda text, summary: save_to_pdf(text, summary), inputs=[text_output, summary_output], outputs=[pdf_output])
 
 
202
 
203
  iface.launch(share=True, debug=True)