Spaces:

camparchimedes
/

nb

Build error

App Files Files

camparchimedes commited on Aug 26, 2024

Commit

3a22e5c

verified ·

1 Parent(s): b6f831c

Update app.py

Browse files

Files changed (1) hide show

app.py +3 -23

app.py CHANGED Viewed

@@ -62,7 +62,7 @@ if not torch.cuda.is_available():
 device = "cuda"
 def convert_to_wav(filepath):
-    _,file_ending = os.path.splitext(f'{filepath}')
     audio_file = filepath.replace(file_ending, ".wav")
     os.system(f'ffmpeg -i "{filepath}" -ar 16000 -ac 1 -c:a pcm_s16le "{audio_file}"')
     return audio_file
@@ -141,12 +141,7 @@ def build_similarity_matrix(sentences, stop_words):
 # PageRank
 def graph_based_summary(text, num_paragraphs=3):
-     """
-    1. Constructs a token-based similarity matrix where sentences are nodes, with edge weights representing token overlap.
-       Applies PageRank to determine sentence importance, ranking nodes by their centrality in the graph.
-       Optimized for scenarios with dense inter-sentence relationships.
-    2. Bjørn: gir sammendrag av det som er mest relevant i teksten.
-    """
     doc = nlp(text)
     sentences = [sent.text for sent in doc.sents]
     if len(sentences) < num_paragraphs:
@@ -163,12 +158,7 @@ def graph_based_summary(text, num_paragraphs=3):
 # LexRank
 def lex_rank_summary(text, num_paragraphs=3, threshold=0.1):
-     """
-    1. Uses TF-IDF vectorization to compute cosine similarity, graph with sentences as nodes. PageRank estimate sentences
-       by their eigenvector centrality, identifying the most representative ones. Good for sparse graph structures with thresholding.
-    2. Bjørn: gir et sammendrag som best representerer hele teksten.
-    """
     doc = nlp(text)
     sentences = [sent.text for sent in doc.sents]
     if len(sentences) < num_paragraphs:
@@ -188,14 +178,7 @@ def lex_rank_summary(text, num_paragraphs=3, threshold=0.1):
 # TextRank
 def text_rank_summary(text, num_paragraphs=3):
-    """
-    1. Constructs a graph with sentences as nodes (does not use TF-IDF), connected by weighted edges based on lexical similarity
-       (counts the number of shared words between two sentences and dividing by the total number of words to get a similarity score),
-       Suitable for general-purpose text summarization.
-    2. Bjørn: gir et sammendrag som fanger opp de viktigste delene av teksten.
-    """
     doc = nlp(text)
     sentences = [sent.text for sent in doc.sents]
     if len(sentences) < num_paragraphs:
@@ -304,7 +287,4 @@ with iface:
             pdf_summary_only.click(fn=lambda summary: save_to_pdf("", summary), inputs=[summary_output_graph, summary_output_lex, summary_output_text_rank], outputs=[pdf_output])  # Includes all summary outputs
             pdf_both.click(fn=lambda text, summary: save_to_pdf(text, summary), inputs=[text_output, summary_output_graph], outputs=[pdf_output])  # Defaulting to Graph-based summary
 iface.launch(share=True, debug=True)

 device = "cuda"
 def convert_to_wav(filepath):
+    _, file_ending = os.path.splitext(f'{filepath}')
     audio_file = filepath.replace(file_ending, ".wav")
     os.system(f'ffmpeg -i "{filepath}" -ar 16000 -ac 1 -c:a pcm_s16le "{audio_file}"')
     return audio_file
 # PageRank
 def graph_based_summary(text, num_paragraphs=3):
     doc = nlp(text)
     sentences = [sent.text for sent in doc.sents]
     if len(sentences) < num_paragraphs:
 # LexRank
 def lex_rank_summary(text, num_paragraphs=3, threshold=0.1):
     doc = nlp(text)
     sentences = [sent.text for sent in doc.sents]
     if len(sentences) < num_paragraphs:
 # TextRank
 def text_rank_summary(text, num_paragraphs=3):
     doc = nlp(text)
     sentences = [sent.text for sent in doc.sents]
     if len(sentences) < num_paragraphs:
             pdf_summary_only.click(fn=lambda summary: save_to_pdf("", summary), inputs=[summary_output_graph, summary_output_lex, summary_output_text_rank], outputs=[pdf_output])  # Includes all summary outputs
             pdf_both.click(fn=lambda text, summary: save_to_pdf(text, summary), inputs=[text_output, summary_output_graph], outputs=[pdf_output])  # Defaulting to Graph-based summary
 iface.launch(share=True, debug=True)