Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -62,7 +62,7 @@ if not torch.cuda.is_available():
|
|
62 |
device = "cuda"
|
63 |
|
64 |
def convert_to_wav(filepath):
|
65 |
-
_,file_ending = os.path.splitext(f'{filepath}')
|
66 |
audio_file = filepath.replace(file_ending, ".wav")
|
67 |
os.system(f'ffmpeg -i "{filepath}" -ar 16000 -ac 1 -c:a pcm_s16le "{audio_file}"')
|
68 |
return audio_file
|
@@ -141,12 +141,7 @@ def build_similarity_matrix(sentences, stop_words):
|
|
141 |
|
142 |
# PageRank
|
143 |
def graph_based_summary(text, num_paragraphs=3):
|
144 |
-
|
145 |
-
1. Constructs a token-based similarity matrix where sentences are nodes, with edge weights representing token overlap.
|
146 |
-
Applies PageRank to determine sentence importance, ranking nodes by their centrality in the graph.
|
147 |
-
Optimized for scenarios with dense inter-sentence relationships.
|
148 |
-
2. Bjørn: gir sammendrag av det som er mest relevant i teksten.
|
149 |
-
"""
|
150 |
doc = nlp(text)
|
151 |
sentences = [sent.text for sent in doc.sents]
|
152 |
if len(sentences) < num_paragraphs:
|
@@ -163,12 +158,7 @@ def graph_based_summary(text, num_paragraphs=3):
|
|
163 |
|
164 |
# LexRank
|
165 |
def lex_rank_summary(text, num_paragraphs=3, threshold=0.1):
|
166 |
-
"""
|
167 |
-
1. Uses TF-IDF vectorization to compute cosine similarity, graph with sentences as nodes. PageRank estimate sentences
|
168 |
-
by their eigenvector centrality, identifying the most representative ones. Good for sparse graph structures with thresholding.
|
169 |
-
2. Bjørn: gir et sammendrag som best representerer hele teksten.
|
170 |
|
171 |
-
"""
|
172 |
doc = nlp(text)
|
173 |
sentences = [sent.text for sent in doc.sents]
|
174 |
if len(sentences) < num_paragraphs:
|
@@ -188,14 +178,7 @@ def lex_rank_summary(text, num_paragraphs=3, threshold=0.1):
|
|
188 |
|
189 |
# TextRank
|
190 |
def text_rank_summary(text, num_paragraphs=3):
|
191 |
-
|
192 |
-
1. Constructs a graph with sentences as nodes (does not use TF-IDF), connected by weighted edges based on lexical similarity
|
193 |
-
(counts the number of shared words between two sentences and dividing by the total number of words to get a similarity score),
|
194 |
-
|
195 |
-
|
196 |
-
Suitable for general-purpose text summarization.
|
197 |
-
2. Bjørn: gir et sammendrag som fanger opp de viktigste delene av teksten.
|
198 |
-
"""
|
199 |
doc = nlp(text)
|
200 |
sentences = [sent.text for sent in doc.sents]
|
201 |
if len(sentences) < num_paragraphs:
|
@@ -304,7 +287,4 @@ with iface:
|
|
304 |
pdf_summary_only.click(fn=lambda summary: save_to_pdf("", summary), inputs=[summary_output_graph, summary_output_lex, summary_output_text_rank], outputs=[pdf_output]) # Includes all summary outputs
|
305 |
pdf_both.click(fn=lambda text, summary: save_to_pdf(text, summary), inputs=[text_output, summary_output_graph], outputs=[pdf_output]) # Defaulting to Graph-based summary
|
306 |
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
iface.launch(share=True, debug=True)
|
|
|
62 |
device = "cuda"
|
63 |
|
64 |
def convert_to_wav(filepath):
|
65 |
+
_, file_ending = os.path.splitext(f'{filepath}')
|
66 |
audio_file = filepath.replace(file_ending, ".wav")
|
67 |
os.system(f'ffmpeg -i "{filepath}" -ar 16000 -ac 1 -c:a pcm_s16le "{audio_file}"')
|
68 |
return audio_file
|
|
|
141 |
|
142 |
# PageRank
|
143 |
def graph_based_summary(text, num_paragraphs=3):
|
144 |
+
|
|
|
|
|
|
|
|
|
|
|
145 |
doc = nlp(text)
|
146 |
sentences = [sent.text for sent in doc.sents]
|
147 |
if len(sentences) < num_paragraphs:
|
|
|
158 |
|
159 |
# LexRank
|
160 |
def lex_rank_summary(text, num_paragraphs=3, threshold=0.1):
|
|
|
|
|
|
|
|
|
161 |
|
|
|
162 |
doc = nlp(text)
|
163 |
sentences = [sent.text for sent in doc.sents]
|
164 |
if len(sentences) < num_paragraphs:
|
|
|
178 |
|
179 |
# TextRank
|
180 |
def text_rank_summary(text, num_paragraphs=3):
|
181 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
doc = nlp(text)
|
183 |
sentences = [sent.text for sent in doc.sents]
|
184 |
if len(sentences) < num_paragraphs:
|
|
|
287 |
pdf_summary_only.click(fn=lambda summary: save_to_pdf("", summary), inputs=[summary_output_graph, summary_output_lex, summary_output_text_rank], outputs=[pdf_output]) # Includes all summary outputs
|
288 |
pdf_both.click(fn=lambda text, summary: save_to_pdf(text, summary), inputs=[text_output, summary_output_graph], outputs=[pdf_output]) # Defaulting to Graph-based summary
|
289 |
|
|
|
|
|
|
|
290 |
iface.launch(share=True, debug=True)
|