camparchimedes commited on
Commit
b6f831c
·
verified ·
1 Parent(s): 1376856

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -28
app.py CHANGED
@@ -54,7 +54,12 @@ SIDEBAR_INFO = f"""
54
  <img src="{LOGO}" style="width: 100%; height: auto;"/>
55
  </div>
56
  """
57
- device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
 
 
 
58
 
59
  def convert_to_wav(filepath):
60
  _,file_ending = os.path.splitext(f'{filepath}')
@@ -134,12 +139,18 @@ def build_similarity_matrix(sentences, stop_words):
134
  similarity_matrix.add_edge(i, j, weight=len(common_words))
135
  return similarity_matrix
136
 
137
- # "Graph-based summarization" =====>
138
  def graph_based_summary(text, num_paragraphs=3):
 
 
 
 
 
 
139
  doc = nlp(text)
140
  sentences = [sent.text for sent in doc.sents]
141
  if len(sentences) < num_paragraphs:
142
- return sentences
143
 
144
  sentence_tokens = [nlp(sent) for sent in sentences]
145
  stop_words = spacy_stop_words
@@ -152,10 +163,16 @@ def graph_based_summary(text, num_paragraphs=3):
152
 
153
  # LexRank
154
  def lex_rank_summary(text, num_paragraphs=3, threshold=0.1):
 
 
 
 
 
 
155
  doc = nlp(text)
156
  sentences = [sent.text for sent in doc.sents]
157
  if len(sentences) < num_paragraphs:
158
- return sentences
159
 
160
  stop_words = spacy_stop_words
161
  vectorizer = TfidfVectorizer(stop_words=list(stop_words))
@@ -171,10 +188,18 @@ def lex_rank_summary(text, num_paragraphs=3, threshold=0.1):
171
 
172
  # TextRank
173
  def text_rank_summary(text, num_paragraphs=3):
 
 
 
 
 
 
 
 
174
  doc = nlp(text)
175
  sentences = [sent.text for sent in doc.sents]
176
  if len(sentences) < num_paragraphs:
177
- return sentences
178
 
179
  stop_words = spacy_stop_words
180
  vectorizer = TfidfVectorizer(stop_words=list(stop_words))
@@ -213,31 +238,60 @@ with iface:
213
  gr.Markdown(HEADER_INFO)
214
 
215
  with gr.Tabs():
216
- with gr.TabItem("Transcription"):
217
- audio_input = gr.Audio(type="filepath")
218
- text_output = gr.Textbox(label="Text")
219
- result_output = gr.Textbox(label="Transcription Details")
220
- transcribe_button = gr.Button("Transcribe")
221
-
222
- transcribe_button.click(fn=transcribe_audio, inputs=[audio_input], outputs=[text_output, result_output])
223
 
224
- with gr.TabItem("Summary | Graph-based"):
225
- summary_output = gr.Textbox(label="Summary | Graph-based")
226
- summarize_button = gr.Button("Summarize")
227
-
228
- summarize_button.click(fn=lambda text: graph_based_summary(text), inputs=[text_output], outputs=[summary_output])
 
 
 
 
 
 
 
 
 
 
 
 
229
 
230
  with gr.TabItem("Summary | LexRank"):
231
- summary_output = gr.Textbox(label="Summary | LexRank")
232
- summarize_button = gr.Button("Summarize")
233
-
234
- summarize_button.click(fn=lambda text: lex_rank_summary(text), inputs=[text_output], outputs=[summary_output])
 
 
 
 
 
 
 
 
 
 
 
 
235
 
236
  with gr.TabItem("Summary | TextRank"):
237
- summary_output = gr.Textbox(label="Summary | TextRank")
238
- summarize_button = gr.Button("Summarize")
239
-
240
- summarize_button.click(fn=lambda text: text_rank_summary(text), inputs=[text_output], outputs=[summary_output])
 
 
 
 
 
 
 
 
 
 
 
 
241
 
242
  with gr.TabItem("Download PDF"):
243
  pdf_text_only = gr.Button("Download PDF with Text Only")
@@ -247,7 +301,10 @@ with iface:
247
  pdf_output = gr.File(label="Download PDF")
248
 
249
  pdf_text_only.click(fn=lambda text: save_to_pdf(text, ""), inputs=[text_output], outputs=[pdf_output])
250
- pdf_summary_only.click(fn=lambda summary: save_to_pdf("", summary), inputs=[summary_output], outputs=[pdf_output])
251
- pdf_both.click(fn=lambda text, summary: save_to_pdf(text, summary), inputs=[text_output, summary_output], outputs=[pdf_output])
 
 
 
252
 
253
- iface.launch(share=True, debug=True)
 
54
  <img src="{LOGO}" style="width: 100%; height: auto;"/>
55
  </div>
56
  """
57
+ # Check if CUDA is available
58
+ if not torch.cuda.is_available():
59
+ raise RuntimeError("CUDA not available. Go look for a GPU.")
60
+
61
+ # Set GPU
62
+ device = "cuda"
63
 
64
  def convert_to_wav(filepath):
65
  _,file_ending = os.path.splitext(f'{filepath}')
 
139
  similarity_matrix.add_edge(i, j, weight=len(common_words))
140
  return similarity_matrix
141
 
142
+ # PageRank
143
  def graph_based_summary(text, num_paragraphs=3):
144
+ """
145
+ 1. Constructs a token-based similarity matrix where sentences are nodes, with edge weights representing token overlap.
146
+ Applies PageRank to determine sentence importance, ranking nodes by their centrality in the graph.
147
+ Optimized for scenarios with dense inter-sentence relationships.
148
+ 2. Bjørn: gir sammendrag av det som er mest relevant i teksten.
149
+ """
150
  doc = nlp(text)
151
  sentences = [sent.text for sent in doc.sents]
152
  if len(sentences) < num_paragraphs:
153
+ return ' '.join(sentences) # Adjusted to return a single string
154
 
155
  sentence_tokens = [nlp(sent) for sent in sentences]
156
  stop_words = spacy_stop_words
 
163
 
164
  # LexRank
165
  def lex_rank_summary(text, num_paragraphs=3, threshold=0.1):
166
+ """
167
+ 1. Uses TF-IDF vectorization to compute cosine similarity, graph with sentences as nodes. PageRank estimate sentences
168
+ by their eigenvector centrality, identifying the most representative ones. Good for sparse graph structures with thresholding.
169
+ 2. Bjørn: gir et sammendrag som best representerer hele teksten.
170
+
171
+ """
172
  doc = nlp(text)
173
  sentences = [sent.text for sent in doc.sents]
174
  if len(sentences) < num_paragraphs:
175
+ return ' '.join(sentences) # Adjusted to return a single string
176
 
177
  stop_words = spacy_stop_words
178
  vectorizer = TfidfVectorizer(stop_words=list(stop_words))
 
188
 
189
  # TextRank
190
  def text_rank_summary(text, num_paragraphs=3):
191
+ """
192
+ 1. Constructs a graph with sentences as nodes (does not use TF-IDF), connected by weighted edges based on lexical similarity
193
+ (counts the number of shared words between two sentences and dividing by the total number of words to get a similarity score),
194
+
195
+
196
+ Suitable for general-purpose text summarization.
197
+ 2. Bjørn: gir et sammendrag som fanger opp de viktigste delene av teksten.
198
+ """
199
  doc = nlp(text)
200
  sentences = [sent.text for sent in doc.sents]
201
  if len(sentences) < num_paragraphs:
202
+ return ' '.join(sentences) # Adjusted to return a single string
203
 
204
  stop_words = spacy_stop_words
205
  vectorizer = TfidfVectorizer(stop_words=list(stop_words))
 
238
  gr.Markdown(HEADER_INFO)
239
 
240
  with gr.Tabs():
 
 
 
 
 
 
 
241
 
242
+ with gr.TabItem("Summary | PageRank"):
243
+ text_input_graph = gr.Textbox(label="Input Text", placeholder="txt2summarize")
244
+ summary_output_graph = gr.Textbox(label="PageRank | token-based similarity")
245
+
246
+ # Displaying AiLab and Bjørn explanations
247
+ gr.Markdown("""
248
+ **token-based**: similarity matrix edge weights representing token overlap/
249
+ ranked by their centrality in the graph (good with dense inter-sentence relationships)
250
+ """)
251
+ gr.Markdown("""
252
+ *Bjørn*: **gir sammendrag som fanger opp de mest relevante setninger i teksten**
253
+ """)
254
+
255
+ summarize_transcribed_button_graph = gr.Button("Summary of Transcribed Text, Click Here")
256
+ summarize_transcribed_button_graph.click(fn=lambda text: graph_based_summary(text), inputs=[text_output], outputs=[summary_output_graph])
257
+ summarize_uploaded_button_graph = gr.Button("Upload Text to Summarize, Click Here")
258
+ summarize_uploaded_button_graph.click(fn=graph_based_summary, inputs=[text_input_graph], outputs=[summary_output_graph])
259
 
260
  with gr.TabItem("Summary | LexRank"):
261
+ text_input_lex = gr.Textbox(label="Input Text", placeholder="txt2summarize")
262
+ summary_output_lex = gr.Textbox(label="LexRank | cosine similarity")
263
+
264
+ # Displaying AiLab and Bjørn explanations
265
+ gr.Markdown("""
266
+ **semantic**: TF-IDF vectorization@cosine similarity matrix, ranked by eigenvector centrality.
267
+ (good for sparse graph structures with thresholding)
268
+ """)
269
+ gr.Markdown("""
270
+ *Bjørn*: **gir sammendrag som best fanger opp betydningen av hele teksten**
271
+ """)
272
+
273
+ summarize_transcribed_button_lex = gr.Button("Summary of Transcribed Text, Click Here")
274
+ summarize_transcribed_button_lex.click(fn=lambda text: lex_rank_summary(text), inputs=[text_output], outputs=[summary_output_lex])
275
+ summarize_uploaded_button_lex = gr.Button("Upload Text to Summarize, Click Here")
276
+ summarize_uploaded_button_lex.click(fn=lex_rank_summary, inputs=[text_input_lex], outputs=[summary_output_lex])
277
 
278
  with gr.TabItem("Summary | TextRank"):
279
+ text_input_text_rank = gr.Textbox(label="Input Text", placeholder="txt2summarize")
280
+ summary_output_text_rank = gr.Textbox(label="TextRank | lexical similarity")
281
+
282
+ # Displaying AiLab and Bjørn explanations
283
+ gr.Markdown("""
284
+ **sentence**: graph with weighted edges based on lexical similarity. (i.e" "sentence similarity"word overlap)/sentence similarity
285
+ """)
286
+ gr.Markdown("""
287
+ *Bjørn*: **sammendrag basert på i de setningene som ligner mest på hverandre fra teksten**
288
+
289
+ """)
290
+
291
+ summarize_transcribed_button_text_rank = gr.Button("Summary of Transcribed Text, Click Here")
292
+ summarize_transcribed_button_text_rank.click(fn=lambda text: text_rank_summary(text), inputs=[text_output], outputs=[summary_output_text_rank])
293
+ summarize_uploaded_button_text_rank = gr.Button("Upload Text to Summarize, Click Here")
294
+ summarize_uploaded_button_text_rank.click(fn=text_rank_summary, inputs=[text_input_text_rank], outputs=[summary_output_text_rank])
295
 
296
  with gr.TabItem("Download PDF"):
297
  pdf_text_only = gr.Button("Download PDF with Text Only")
 
301
  pdf_output = gr.File(label="Download PDF")
302
 
303
  pdf_text_only.click(fn=lambda text: save_to_pdf(text, ""), inputs=[text_output], outputs=[pdf_output])
304
+ pdf_summary_only.click(fn=lambda summary: save_to_pdf("", summary), inputs=[summary_output_graph, summary_output_lex, summary_output_text_rank], outputs=[pdf_output]) # Includes all summary outputs
305
+ pdf_both.click(fn=lambda text, summary: save_to_pdf(text, summary), inputs=[text_output, summary_output_graph], outputs=[pdf_output]) # Defaulting to Graph-based summary
306
+
307
+
308
+
309
 
310
+ iface.launch(share=True, debug=True)