camparchimedes commited on
Commit
3a22e5c
·
verified ·
1 Parent(s): b6f831c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -23
app.py CHANGED
@@ -62,7 +62,7 @@ if not torch.cuda.is_available():
62
  device = "cuda"
63
 
64
  def convert_to_wav(filepath):
65
- _,file_ending = os.path.splitext(f'{filepath}')
66
  audio_file = filepath.replace(file_ending, ".wav")
67
  os.system(f'ffmpeg -i "{filepath}" -ar 16000 -ac 1 -c:a pcm_s16le "{audio_file}"')
68
  return audio_file
@@ -141,12 +141,7 @@ def build_similarity_matrix(sentences, stop_words):
141
 
142
  # PageRank
143
  def graph_based_summary(text, num_paragraphs=3):
144
- """
145
- 1. Constructs a token-based similarity matrix where sentences are nodes, with edge weights representing token overlap.
146
- Applies PageRank to determine sentence importance, ranking nodes by their centrality in the graph.
147
- Optimized for scenarios with dense inter-sentence relationships.
148
- 2. Bjørn: gir sammendrag av det som er mest relevant i teksten.
149
- """
150
  doc = nlp(text)
151
  sentences = [sent.text for sent in doc.sents]
152
  if len(sentences) < num_paragraphs:
@@ -163,12 +158,7 @@ def graph_based_summary(text, num_paragraphs=3):
163
 
164
  # LexRank
165
  def lex_rank_summary(text, num_paragraphs=3, threshold=0.1):
166
- """
167
- 1. Uses TF-IDF vectorization to compute cosine similarity, graph with sentences as nodes. PageRank estimate sentences
168
- by their eigenvector centrality, identifying the most representative ones. Good for sparse graph structures with thresholding.
169
- 2. Bjørn: gir et sammendrag som best representerer hele teksten.
170
 
171
- """
172
  doc = nlp(text)
173
  sentences = [sent.text for sent in doc.sents]
174
  if len(sentences) < num_paragraphs:
@@ -188,14 +178,7 @@ def lex_rank_summary(text, num_paragraphs=3, threshold=0.1):
188
 
189
  # TextRank
190
  def text_rank_summary(text, num_paragraphs=3):
191
- """
192
- 1. Constructs a graph with sentences as nodes (does not use TF-IDF), connected by weighted edges based on lexical similarity
193
- (counts the number of shared words between two sentences and dividing by the total number of words to get a similarity score),
194
-
195
-
196
- Suitable for general-purpose text summarization.
197
- 2. Bjørn: gir et sammendrag som fanger opp de viktigste delene av teksten.
198
- """
199
  doc = nlp(text)
200
  sentences = [sent.text for sent in doc.sents]
201
  if len(sentences) < num_paragraphs:
@@ -304,7 +287,4 @@ with iface:
304
  pdf_summary_only.click(fn=lambda summary: save_to_pdf("", summary), inputs=[summary_output_graph, summary_output_lex, summary_output_text_rank], outputs=[pdf_output]) # Includes all summary outputs
305
  pdf_both.click(fn=lambda text, summary: save_to_pdf(text, summary), inputs=[text_output, summary_output_graph], outputs=[pdf_output]) # Defaulting to Graph-based summary
306
 
307
-
308
-
309
-
310
  iface.launch(share=True, debug=True)
 
62
  device = "cuda"
63
 
64
  def convert_to_wav(filepath):
65
+ _, file_ending = os.path.splitext(f'{filepath}')
66
  audio_file = filepath.replace(file_ending, ".wav")
67
  os.system(f'ffmpeg -i "{filepath}" -ar 16000 -ac 1 -c:a pcm_s16le "{audio_file}"')
68
  return audio_file
 
141
 
142
  # PageRank
143
  def graph_based_summary(text, num_paragraphs=3):
144
+
 
 
 
 
 
145
  doc = nlp(text)
146
  sentences = [sent.text for sent in doc.sents]
147
  if len(sentences) < num_paragraphs:
 
158
 
159
  # LexRank
160
  def lex_rank_summary(text, num_paragraphs=3, threshold=0.1):
 
 
 
 
161
 
 
162
  doc = nlp(text)
163
  sentences = [sent.text for sent in doc.sents]
164
  if len(sentences) < num_paragraphs:
 
178
 
179
  # TextRank
180
  def text_rank_summary(text, num_paragraphs=3):
181
+
 
 
 
 
 
 
 
182
  doc = nlp(text)
183
  sentences = [sent.text for sent in doc.sents]
184
  if len(sentences) < num_paragraphs:
 
287
  pdf_summary_only.click(fn=lambda summary: save_to_pdf("", summary), inputs=[summary_output_graph, summary_output_lex, summary_output_text_rank], outputs=[pdf_output]) # Includes all summary outputs
288
  pdf_both.click(fn=lambda text, summary: save_to_pdf(text, summary), inputs=[text_output, summary_output_graph], outputs=[pdf_output]) # Defaulting to Graph-based summary
289
 
 
 
 
290
  iface.launch(share=True, debug=True)