Spaces:

camparchimedes
/

nb

Build error

App Files Files

camparchimedes commited on Aug 26, 2024

Commit

b6f831c

verified ·

1 Parent(s): 1376856

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -28

app.py CHANGED Viewed

@@ -54,7 +54,12 @@ SIDEBAR_INFO = f"""
     <img src="{LOGO}" style="width: 100%; height: auto;"/>
 </div>
 """
-device = "cuda" if torch.cuda.is_available() else "cpu"
 def convert_to_wav(filepath):
     _,file_ending = os.path.splitext(f'{filepath}')
@@ -134,12 +139,18 @@ def build_similarity_matrix(sentences, stop_words):
                 similarity_matrix.add_edge(i, j, weight=len(common_words))
     return similarity_matrix
-# "Graph-based summarization" =====>
 def graph_based_summary(text, num_paragraphs=3):
     doc = nlp(text)
     sentences = [sent.text for sent in doc.sents]
     if len(sentences) < num_paragraphs:
-        return sentences
     sentence_tokens = [nlp(sent) for sent in sentences]
     stop_words = spacy_stop_words
@@ -152,10 +163,16 @@ def graph_based_summary(text, num_paragraphs=3):
 # LexRank
 def lex_rank_summary(text, num_paragraphs=3, threshold=0.1):
     doc = nlp(text)
     sentences = [sent.text for sent in doc.sents]
     if len(sentences) < num_paragraphs:
-        return sentences
     stop_words = spacy_stop_words
     vectorizer = TfidfVectorizer(stop_words=list(stop_words))
@@ -171,10 +188,18 @@ def lex_rank_summary(text, num_paragraphs=3, threshold=0.1):
 # TextRank
 def text_rank_summary(text, num_paragraphs=3):
     doc = nlp(text)
     sentences = [sent.text for sent in doc.sents]
     if len(sentences) < num_paragraphs:
-        return sentences
     stop_words = spacy_stop_words
     vectorizer = TfidfVectorizer(stop_words=list(stop_words))
@@ -213,31 +238,60 @@ with iface:
     gr.Markdown(HEADER_INFO)
     with gr.Tabs():
-        with gr.TabItem("Transcription"):
-            audio_input = gr.Audio(type="filepath")
-            text_output = gr.Textbox(label="Text")
-            result_output = gr.Textbox(label="Transcription Details")
-            transcribe_button = gr.Button("Transcribe")
-            transcribe_button.click(fn=transcribe_audio, inputs=[audio_input], outputs=[text_output, result_output])
-        with gr.TabItem("Summary | Graph-based"):
-            summary_output = gr.Textbox(label="Summary | Graph-based")
-            summarize_button = gr.Button("Summarize")
-            summarize_button.click(fn=lambda text: graph_based_summary(text), inputs=[text_output], outputs=[summary_output])
         with gr.TabItem("Summary | LexRank"):
-            summary_output = gr.Textbox(label="Summary | LexRank")
-            summarize_button = gr.Button("Summarize")
-            summarize_button.click(fn=lambda text: lex_rank_summary(text), inputs=[text_output], outputs=[summary_output])
         with gr.TabItem("Summary | TextRank"):
-            summary_output = gr.Textbox(label="Summary | TextRank")
-            summarize_button = gr.Button("Summarize")
-            summarize_button.click(fn=lambda text: text_rank_summary(text), inputs=[text_output], outputs=[summary_output])
         with gr.TabItem("Download PDF"):
             pdf_text_only = gr.Button("Download PDF with Text Only")
@@ -247,7 +301,10 @@ with iface:
             pdf_output = gr.File(label="Download PDF")
             pdf_text_only.click(fn=lambda text: save_to_pdf(text, ""), inputs=[text_output], outputs=[pdf_output])
-            pdf_summary_only.click(fn=lambda summary: save_to_pdf("", summary), inputs=[summary_output], outputs=[pdf_output])
-            pdf_both.click(fn=lambda text, summary: save_to_pdf(text, summary), inputs=[text_output, summary_output], outputs=[pdf_output])
-iface.launch(share=True, debug=True)

     <img src="{LOGO}" style="width: 100%; height: auto;"/>
 </div>
 """
+# Check if CUDA is available
+if not torch.cuda.is_available():
+    raise RuntimeError("CUDA not available. Go look for a GPU.")
+# Set GPU
+device = "cuda"
 def convert_to_wav(filepath):
     _,file_ending = os.path.splitext(f'{filepath}')
                 similarity_matrix.add_edge(i, j, weight=len(common_words))
     return similarity_matrix
+# PageRank
 def graph_based_summary(text, num_paragraphs=3):
+     """
+    1. Constructs a token-based similarity matrix where sentences are nodes, with edge weights representing token overlap.
+       Applies PageRank to determine sentence importance, ranking nodes by their centrality in the graph.
+       Optimized for scenarios with dense inter-sentence relationships.
+    2. Bjørn: gir sammendrag av det som er mest relevant i teksten.
+    """
     doc = nlp(text)
     sentences = [sent.text for sent in doc.sents]
     if len(sentences) < num_paragraphs:
+        return ' '.join(sentences)  # Adjusted to return a single string
     sentence_tokens = [nlp(sent) for sent in sentences]
     stop_words = spacy_stop_words
 # LexRank
 def lex_rank_summary(text, num_paragraphs=3, threshold=0.1):
+     """
+    1. Uses TF-IDF vectorization to compute cosine similarity, graph with sentences as nodes. PageRank estimate sentences
+       by their eigenvector centrality, identifying the most representative ones. Good for sparse graph structures with thresholding.
+    2. Bjørn: gir et sammendrag som best representerer hele teksten.
+    """
     doc = nlp(text)
     sentences = [sent.text for sent in doc.sents]
     if len(sentences) < num_paragraphs:
+        return ' '.join(sentences)  # Adjusted to return a single string
     stop_words = spacy_stop_words
     vectorizer = TfidfVectorizer(stop_words=list(stop_words))
 # TextRank
 def text_rank_summary(text, num_paragraphs=3):
+    """
+    1. Constructs a graph with sentences as nodes (does not use TF-IDF), connected by weighted edges based on lexical similarity
+       (counts the number of shared words between two sentences and dividing by the total number of words to get a similarity score),
+       Suitable for general-purpose text summarization.
+    2. Bjørn: gir et sammendrag som fanger opp de viktigste delene av teksten.
+    """
     doc = nlp(text)
     sentences = [sent.text for sent in doc.sents]
     if len(sentences) < num_paragraphs:
+        return ' '.join(sentences)  # Adjusted to return a single string
     stop_words = spacy_stop_words
     vectorizer = TfidfVectorizer(stop_words=list(stop_words))
     gr.Markdown(HEADER_INFO)
     with gr.Tabs():
+        with gr.TabItem("Summary | PageRank"):
+            text_input_graph = gr.Textbox(label="Input Text", placeholder="txt2summarize")
+            summary_output_graph = gr.Textbox(label="PageRank | token-based similarity")
+            # Displaying AiLab and Bjørn explanations
+            gr.Markdown("""
+            **token-based**: similarity matrix edge weights representing token overlap/
+            ranked by their centrality in the graph (good with dense inter-sentence relationships)
+            """)
+            gr.Markdown("""
+            *Bjørn*: **gir sammendrag som fanger opp de mest relevante setninger i teksten**
+            """)
+            summarize_transcribed_button_graph = gr.Button("Summary of Transcribed Text, Click Here")
+            summarize_transcribed_button_graph.click(fn=lambda text: graph_based_summary(text), inputs=[text_output], outputs=[summary_output_graph])
+            summarize_uploaded_button_graph = gr.Button("Upload Text to Summarize, Click Here")
+            summarize_uploaded_button_graph.click(fn=graph_based_summary, inputs=[text_input_graph], outputs=[summary_output_graph])
         with gr.TabItem("Summary | LexRank"):
+            text_input_lex = gr.Textbox(label="Input Text", placeholder="txt2summarize")
+            summary_output_lex = gr.Textbox(label="LexRank | cosine similarity")
+            # Displaying AiLab and Bjørn explanations
+            gr.Markdown("""
+            **semantic**: TF-IDF vectorization@cosine similarity matrix, ranked by eigenvector centrality.
+            (good for sparse graph structures with thresholding)
+            """)
+            gr.Markdown("""
+            *Bjørn*: **gir sammendrag som best fanger opp betydningen av hele teksten**
+            """)
+            summarize_transcribed_button_lex = gr.Button("Summary of Transcribed Text, Click Here")
+            summarize_transcribed_button_lex.click(fn=lambda text: lex_rank_summary(text), inputs=[text_output], outputs=[summary_output_lex])
+            summarize_uploaded_button_lex = gr.Button("Upload Text to Summarize, Click Here")
+            summarize_uploaded_button_lex.click(fn=lex_rank_summary, inputs=[text_input_lex], outputs=[summary_output_lex])
         with gr.TabItem("Summary | TextRank"):
+            text_input_text_rank = gr.Textbox(label="Input Text", placeholder="txt2summarize")
+            summary_output_text_rank = gr.Textbox(label="TextRank | lexical similarity")
+            # Displaying AiLab and Bjørn explanations
+            gr.Markdown("""
+            **sentence**: graph with weighted edges based on lexical similarity. (i.e" "sentence similarity"word overlap)/sentence similarity
+            """)
+            gr.Markdown("""
+            *Bjørn*: **sammendrag basert på i de setningene som ligner mest på hverandre fra teksten**
+            """)
+            summarize_transcribed_button_text_rank = gr.Button("Summary of Transcribed Text, Click Here")
+            summarize_transcribed_button_text_rank.click(fn=lambda text: text_rank_summary(text), inputs=[text_output], outputs=[summary_output_text_rank])
+            summarize_uploaded_button_text_rank = gr.Button("Upload Text to Summarize, Click Here")
+            summarize_uploaded_button_text_rank.click(fn=text_rank_summary, inputs=[text_input_text_rank], outputs=[summary_output_text_rank])
         with gr.TabItem("Download PDF"):
             pdf_text_only = gr.Button("Download PDF with Text Only")
             pdf_output = gr.File(label="Download PDF")
             pdf_text_only.click(fn=lambda text: save_to_pdf(text, ""), inputs=[text_output], outputs=[pdf_output])
+            pdf_summary_only.click(fn=lambda summary: save_to_pdf("", summary), inputs=[summary_output_graph, summary_output_lex, summary_output_text_rank], outputs=[pdf_output])  # Includes all summary outputs
+            pdf_both.click(fn=lambda text, summary: save_to_pdf(text, summary), inputs=[text_output, summary_output_graph], outputs=[pdf_output])  # Defaulting to Graph-based summary
+iface.launch(share=True, debug=True)