Spaces:

camparchimedes
/

nb

Build error

App Files Files

camparchimedes commited on Aug 19, 2024

Commit

8c6ad91

verified ·

1 Parent(s): 38c9a82

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -268

app.py CHANGED Viewed

@@ -1,138 +1,80 @@
 import time
 import os
-import spaces
-import contextlib
 import warnings
-warnings.filterwarnings("ignore")
-from pydub import AudioSegment
-# If m4a audio, convert to wav (Python)
 def convert_to_wav(audio_file):
     audio = AudioSegment.from_file(audio_file, format="m4a")
     wav_file = "temp.wav"
     audio.export(wav_file, format="wav")
     return wav_file
-# path/filename: /mnt/data/switchdev.py
-import torch
-from transformers import pipeline, WhisperForConditionalGeneration
-def get_device():
-    # This function defers the device check until it's actually needed
-    return "cuda" if torch.cuda.is_available() else "cpu"
-# prepare decoder input IDs for generation
-def prepare_decoder_input_ids_for_generation_patch(self, batch_size, model_input_name, model_kwargs, decoder_start_token_id, bos_token_id, device):
-    if 'decoder_input_ids' not in model_kwargs:
-        return torch.ones((batch_size, 1), dtype=torch.long) * decoder_start_token_id, model_kwargs
-    else:
-        return model_kwargs.pop('decoder_input_ids'), model_kwargs
-# Patch the WhisperForConditionalGeneration class
-WhisperForConditionalGeneration._prepare_decoder_input_ids_for_generation = prepare_decoder_input_ids_for_generation_patch
-def create_pipeline():
-    # Only initialize the device when the pipeline is created
-    device = get_device()
-    try:
-    pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", device=device)
-except RuntimeError as e:
-    if "CUDA error" in str(e):
-        print("CUDA initialization failed. Falling back to CPU.")
-        pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", device="cpu")
-    else:
-        raise e
-    # pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", device=device)
-    return pipe
 def transcribe_audio(audio_file):
     if audio_file.endswith(".m4a"):
         audio_file = convert_to_wav(audio_file)
-    pipe = create_pipeline()  # Initialize the pipeline here
     start_time = time.time()
-    # transcribe
-    output = pipe(audio_file)
-    # get text
     text = output["text"]
-    end_time = time.time()
-    output_time = end_time - start_time
-    word_count = len(text.split())
-    # summary
-    result = f"Time taken: {output_time:.2f} seconds\nNumber of words: {word_count}"
     return text, result
-import nltk
-from nltk.tokenize import word_tokenize, sent_tokenize
-from nltk.corpus import stopwords
-import networkx as nx
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.metrics.pairwise import cosine_similarity
-import pandas as pd
-import numpy as np
-import re
-nltk.download('punkt')
-nltk.download('stopwords')
-WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
 def clean_text(text):
-    text = re.sub(r'https?:\/\/.*[\r\n]*', '', str(text), flags=re.MULTILINE)
-    text = re.sub(r'\<a href', ' ', str(text))
-    text = re.sub(r'&amp;', '', str(text))
-    text = re.sub(r'\(s+', '(', str(text))
-    text = re.sub(r's+\)', ')', str(text))
-    text = re.sub(r'\(\)', '', str(text))
-    text = re.sub(r'\s+', ' ', str(text))
-    text = re.sub(r'[_"\-;%|+&=*%!?:#$@\[\]]', ' ', str(text))
-    text = re.sub(r'<br />', ' ', str(text))
-    text = re.sub(r'\'', '', str(text))
-    text = re.sub(r'«', '', str(text))
-    text = re.sub(r'»', '', str(text))
-    text = re.sub(r'–', '-', str(text))
-    text = re.sub(r'…', '.', str(text))
-    text = re.sub(r'[^\x00-\x7F]+', ' ', str(text))
     return text
 def preprocess_text(text):
-    try:
-        words = word_tokenize(text)
-        stop_words = set(stopwords.words('norwegian'))
-        words_without_stopwords = [word for word in words if word.lower() not in stop_words]
-        processed_text = ' '.join(words_without_stopwords)
-        return processed_text
-    except Exception as e:
-        st.error(f"Error during text preprocessing: {e}")
-        return None
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-device = "cuda" if torch.cuda.is_available() else "cpu"
-summarization_model = AutoModelForSeq2SeqLM.from_pretrained("t5-base", torch_dtype=torch.float16)
-summarization_tokenizer = AutoTokenizer.from_pretrained("t5-base")
-summarization_model.to(device)
 def summarize_text(text):
     preprocessed_text = preprocess_text(text)
-    if preprocessed_text is None:
-        return None
-    inputs = summarization_tokenizer([text], max_length=1024, return_tensors="pt", truncation=True)
     inputs = inputs.to(device)
     summary_ids = summarization_model.generate(inputs.input_ids, num_beams=5, max_length=150, early_stopping=True)
-    summary = summarization_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
-    return summary
 def build_similarity_matrix(sentences, stop_words):
     similarity_matrix = nx.Graph()
     for i, tokens_a in enumerate(sentences):
@@ -142,10 +84,12 @@ def build_similarity_matrix(sentences, stop_words):
                 similarity_matrix.add_edge(i, j, weight=len(common_words))
     return similarity_matrix
 def graph_based_summary(text, num_paragraphs=3):
-    sentences = text.strip().split(".")
     if len(sentences) < num_paragraphs:
         return sentences
     sentence_tokens = [word_tokenize(sent) for sent in sentences]
     stop_words = set(stopwords.words('norwegian'))
     filtered_tokens = [[word for word in tokens if word.lower() not in stop_words] for tokens in sentence_tokens]
@@ -153,53 +97,27 @@ def graph_based_summary(text, num_paragraphs=3):
     scores = nx.pagerank(similarity_matrix)
     ranked_sentences = sorted(((scores[i], sent) for i, sent in enumerate(sentences)), reverse=True)
-    summary = [sent for _, sent in ranked_sentences[:num_paragraphs]]
-    return summary
 def lex_rank_summary(text, num_paragraphs=3, threshold=0.1):
     sentences = nltk.sent_tokenize(text)
     if len(sentences) < num_paragraphs:
         return sentences
     stop_words = set(stopwords.words('norwegian'))
     vectorizer = TfidfVectorizer(stop_words=list(stop_words))
     X = vectorizer.fit_transform(sentences)
     similarity_matrix = cosine_similarity(X, X)
-# pipe it
-pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", device=device)
-def transcribe_audio(audio_file):
-    if audio_file.endswith(".m4a"):
-        audio_file = convert_to_wav(audio_file)
-    start_time = time.time()
-    # transcribe
-    output = pipe(audio_file)
-    # get text
-    text = output["text"]
-    end_time = time.time()
-    output_time = end_time - start_time
-    word_count = len(text.split())
-    # summary
-    result = f"Time taken: {output_time:.2f} seconds\nNumber of words: {word_count}"
-    return text, result
-    for i in range(len(similarity_matrix)): # threshold
-        for j in range(len(similarity_matrix[i])):
-            if similarity_matrix[i][j] < threshold:
-                similarity_matrix[i][j] = 0.0
     nx_graph = nx.from_numpy_array(similarity_matrix)
     scores = nx.pagerank(nx_graph)
     ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)
-    summary = [ranked_sentences[i][1] for i in range(num_paragraphs)]
-    return summary
 def text_rank_summary(text, num_paragraphs=3):
     sentences = nltk.sent_tokenize(text)
     if len(sentences) < num_paragraphs:
@@ -210,57 +128,21 @@ def text_rank_summary(text, num_paragraphs=3):
     X = vectorizer.fit_transform(sentences)
     similarity_matrix = cosine_similarity(X, X)
-    nx_graph = nx.from_numpy_array(similarity_matrix)  # graph, nodes (i.e sentences) & edges are similarity scores (is cool)
-    scores = nx.pagerank(nx_graph) # PageRank algorithm, scoring sentences
-    ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True) # rank by PageRank scores
-    summary = [ranked_sentences[i][1] for i in range(num_paragraphs)] # top sentences for summary
-    return ' '.join(summary)
-banner_html = """
-<div style="text-align: center;">
-    <img src="https://huggingface.co/spaces/camparchimedes/transcription_app/blob/main/lol.webp" alt="" width="100%" height="auto">
-</div>
-"""
-# https://raw.huggingface.co/spaces/camparchimedes/transcription_app/blob/main/banner_trans.png
-import gradio as gr
-from fpdf import FPDF
-from PIL import Image
 def save_to_pdf(text, summary):
     pdf = FPDF()
-    pdf.add_pag# pipe it
-pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", device=device)
-def transcribe_audio(audio_file):
-    if audio_file.endswith(".m4a"):
-        audio_file = convert_to_wav(audio_file)
-    start_time = time.time()
-    # transcribe
-    output = pipe(audio_file)
-    # get text
-    text = output["text"]
-    end_time = time.time()
-    output_time = end_time - start_time
-    word_count = len(text.split())
-    # summary
-    result = f"Time taken: {output_time:.2f} seconds\nNumber of words: {word_count}"
-    return text, resulte()
     pdf.set_font("Arial", size=12)
     if text:
-        pdf.multi_cell(0, 10, "text:\n" + text)
-    # paragraph space
-    pdf.ln(10)
     if summary:
         pdf.multi_cell(0, 10, "Summary:\n" + summary)
@@ -269,122 +151,53 @@ def transcribe_audio(audio_file):
     pdf.output(pdf_output_path)
     return pdf_output_path
-iface = gr.Interface(
-    fn=transcribe_audio,
-    inputs=gr.Audio(type="filepath"),
-    outputs=gr.Textbox(label="Transcription"),
-    title="SW Transcription App",
-    description="Upload an audio file to get the text",
-    theme="default",
-    live=False
-)
 iface = gr.Blocks()
 with iface:
-    gr.HTML(banner_html)
     gr.Markdown("# Vi har nå muligheten til å oversette lydfiler til norsk skrift.")
     with gr.Tabs():
         with gr.TabItem("Transcription"):
             audio_input = gr.Audio(type="filepath")
-            text_output = gr.Textbox(label="text")
             result_output = gr.Textbox(label="Time taken and Number of words")
             transcribe_button = gr.Button("Transcribe")
-            transcribe_button.click(
-                fn=transcribe_audio,
-                inputs=[audio_input],
-                outputs=[text_output, result_output]
-            )
-        with gr.TabItem("Summary_t1"):
             summary_output = gr.Textbox(label="Summary | Graph-based")
             summarize_button = gr.Button("Summarize")
-            def summarize(text):
-                if not text:
-                    return "Warning: a text must be available."
-                summary = graph_based_summary(text)
-                return summary
-            summarize_button.click(
-                fn=summarize,
-                inputs=[text_output],
-                outputs=summary_output
-            )
-        with gr.TabItem("LexRank"):
             summary_output = gr.Textbox(label="Summary | LexRank")
             summarize_button = gr.Button("Summarize")
-            def summarize(text):
-                if not text:
-                    return "Warning: a text must be available."
-                summary = lex_rank_summary(text)
-                return summary
-            summarize_button.click(
-                fn=summarize,
-                inputs=[text_output],
-                outputs=summary_output
-            )
-        with gr.TabItem("TextRank"):
             summary_output = gr.Textbox(label="Summary | TextRank")
             summarize_button = gr.Button("Summarize")
-            def summarize(text):
-                if not text:
-                    return "Warning: a text must be available."
-                summary = text_rank_summary(text)
-                return summary
-            summarize_button.click(
-                fn=summarize,
-                inputs=[text_output],
-                outputs=summary_output
-            )
         with gr.TabItem("Download PDF"):
-            pdf_text_only = gr.Button("Download PDF with text Only")
             pdf_summary_only = gr.Button("Download PDF with Summary Only")
             pdf_both = gr.Button("Download PDF with Both")
-            pdf_output_text_only = gr.File(label="Download PDF")
-            pdf_output_summary_only = gr.File(label="Download PDF")
-            pdf_output_both = gr.File(label="Download PDF")
-            def generate_pdf_text_only(text):
-                return save_to_pdf(text, "")
-            def generate_pdf_summary_only(summary):
-                return save_to_pdf("", summary)
-            def generate_pdf_both(text, summary):
-                return save_to_pdf(text, summary)
-            pdf_text_only.click(
-                fn=generate_pdf_text_only,
-                inputs=[text_output],
-                outputs=[pdf_output_text_only]
-            )
-            pdf_summary_only.click(
-                fn=generate_pdf_summary_only,
-                inputs=[summary_output],
-                outputs=[pdf_output_summary_only]
-            )
-            pdf_both.click(
-                fn=generate_pdf_both,
-                inputs=[text_output, summary_output],
-                outputs=[pdf_output_both]
-            )
 iface.launch(share=True, debug=True)

 import time
 import os
 import warnings
+from pydub import AudioSegment
+import torch
+from transformers import pipeline, WhisperForConditionalGeneration
+import nltk
+from nltk.tokenize import word_tokenize
+from nltk.corpus import stopwords
+import networkx as nx
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+import pandas as pd
+import numpy as np
+import re
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import gradio as gr
+from fpdf import FPDF
+from PIL import Image
+# Suppress warnings
+warnings.filterwarnings("ignore")
+# NLTK dependencies
+nltk.download('punkt', quiet=True)
+nltk.download('stopwords', quiet=True)
+# Convert m4a audio to wav format
 def convert_to_wav(audio_file):
     audio = AudioSegment.from_file(audio_file, format="m4a")
     wav_file = "temp.wav"
     audio.export(wav_file, format="wav")
     return wav_file
+# Initialize device for torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# Load Whisper model and tokenizer
+whisper_pipeline = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", device=device)
+summarization_model = AutoModelForSeq2SeqLM.from_pretrained("t5-base", torch_dtype=torch.float16).to(device)
+summarization_tokenizer = AutoTokenizer.from_pretrained("t5-base")
+# Transcribe audio to text
 def transcribe_audio(audio_file):
     if audio_file.endswith(".m4a"):
         audio_file = convert_to_wav(audio_file)
     start_time = time.time()
+    output = whisper_pipeline(audio_file)
     text = output["text"]
+    output_time = time.time() - start_time
+    result = f"Time taken: {output_time:.2f} seconds\nNumber of words: {len(text.split())}"
     return text, result
+# Clean and preprocess text for summarization
 def clean_text(text):
+    text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)
+    text = re.sub(r'[^\w\s]', '', text)
+    text = re.sub(r'\s+', ' ', text).strip()
     return text
 def preprocess_text(text):
+    words = word_tokenize(text)
+    stop_words = set(stopwords.words('norwegian'))
+    words = [word for word in words if word.lower() not in stop_words]
+    return ' '.join(words)
+# Summarize text using the T5 model
 def summarize_text(text):
     preprocessed_text = preprocess_text(text)
+    inputs = summarization_tokenizer(preprocessed_text, max_length=1024, return_tensors="pt", truncation=True)
     inputs = inputs.to(device)
     summary_ids = summarization_model.generate(inputs.input_ids, num_beams=5, max_length=150, early_stopping=True)
+    return summarization_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
+# Build similarity matrix for graph-based summary
 def build_similarity_matrix(sentences, stop_words):
     similarity_matrix = nx.Graph()
     for i, tokens_a in enumerate(sentences):
                 similarity_matrix.add_edge(i, j, weight=len(common_words))
     return similarity_matrix
+# Graph-based summarization
 def graph_based_summary(text, num_paragraphs=3):
+    sentences = nltk.sent_tokenize(text)
     if len(sentences) < num_paragraphs:
         return sentences
     sentence_tokens = [word_tokenize(sent) for sent in sentences]
     stop_words = set(stopwords.words('norwegian'))
     filtered_tokens = [[word for word in tokens if word.lower() not in stop_words] for tokens in sentence_tokens]
     scores = nx.pagerank(similarity_matrix)
     ranked_sentences = sorted(((scores[i], sent) for i, sent in enumerate(sentences)), reverse=True)
+    return ' '.join([sent for _, sent in ranked_sentences[:num_paragraphs]])
+# LexRank summarization
 def lex_rank_summary(text, num_paragraphs=3, threshold=0.1):
     sentences = nltk.sent_tokenize(text)
     if len(sentences) < num_paragraphs:
         return sentences
     stop_words = set(stopwords.words('norwegian'))
     vectorizer = TfidfVectorizer(stop_words=list(stop_words))
     X = vectorizer.fit_transform(sentences)
     similarity_matrix = cosine_similarity(X, X)
+    # Apply threshold to the similarity matrix
+    similarity_matrix[similarity_matrix < threshold] = 0
     nx_graph = nx.from_numpy_array(similarity_matrix)
     scores = nx.pagerank(nx_graph)
     ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)
+    return ' '.join([ranked_sentences[i][1] for i in range(num_paragraphs)])
+# TextRank summarization
 def text_rank_summary(text, num_paragraphs=3):
     sentences = nltk.sent_tokenize(text)
     if len(sentences) < num_paragraphs:
     X = vectorizer.fit_transform(sentences)
     similarity_matrix = cosine_similarity(X, X)
+    nx_graph = nx.from_numpy_array(similarity_matrix)
+    scores = nx.pagerank(nx_graph)
+    ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)
+    return ' '.join([ranked_sentences[i][1] for i in range(num_paragraphs)])
+# Save text and summary to PDF
 def save_to_pdf(text, summary):
     pdf = FPDF()
+    pdf.add_page()
     pdf.set_font("Arial", size=12)
     if text:
+        pdf.multi_cell(0, 10, "Text:\n" + text)
+    pdf.ln(10)  # Paragraph space
     if summary:
         pdf.multi_cell(0, 10, "Summary:\n" + summary)
     pdf.output(pdf_output_path)
     return pdf_output_path
+# Gradio Interface
 iface = gr.Blocks()
 with iface:
+    gr.HTML("""
+    <div style="text-align: center;">
+        <img src="https://huggingface.co/spaces/camparchimedes/transcription_app/blob/main/lol.webp" alt="" width="100%" height="auto">
+    </div>
+    """)
     gr.Markdown("# Vi har nå muligheten til å oversette lydfiler til norsk skrift.")
     with gr.Tabs():
         with gr.TabItem("Transcription"):
             audio_input = gr.Audio(type="filepath")
+            text_output = gr.Textbox(label="Text")
             result_output = gr.Textbox(label="Time taken and Number of words")
             transcribe_button = gr.Button("Transcribe")
+            transcribe_button.click(fn=transcribe_audio, inputs=[audio_input], outputs=[text_output, result_output])
+        with gr.TabItem("Summary | Graph-based"):
             summary_output = gr.Textbox(label="Summary | Graph-based")
             summarize_button = gr.Button("Summarize")
+            summarize_button.click(fn=lambda text: graph_based_summary(text), inputs=[text_output], outputs=[summary_output])
+        with gr.TabItem("Summary | LexRank"):
             summary_output = gr.Textbox(label="Summary | LexRank")
             summarize_button = gr.Button("Summarize")
+            summarize_button.click(fn=lambda text: lex_rank_summary(text), inputs=[text_output], outputs=[summary_output])
+        with gr.TabItem("Summary | TextRank"):
             summary_output = gr.Textbox(label="Summary | TextRank")
             summarize_button = gr.Button("Summarize")
+            summarize_button.click(fn=lambda text: text_rank_summary(text), inputs=[text_output], outputs=[summary_output])
         with gr.TabItem("Download PDF"):
+            pdf_text_only = gr.Button("Download PDF with Text Only")
             pdf_summary_only = gr.Button("Download PDF with Summary Only")
             pdf_both = gr.Button("Download PDF with Both")
+            pdf_output = gr.File(label="Download PDF")
+            pdf_text_only.click(fn=lambda text: save_to_pdf(text, ""), inputs=[text_output], outputs=[pdf_output])
+            pdf_summary_only.click(fn=lambda summary: save_to_pdf("", summary), inputs=[summary_output], outputs=[pdf_output])
+            pdf_both.click(fn=lambda text, summary: save_to_pdf(text, summary), inputs=[text_output, summary_output], outputs=[pdf_output])
 iface.launch(share=True, debug=True)