# -----------------COPY OF NEW EDITION[app.py]----------------- # check if still the case...........??********************************************* # "The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results." import time import os import spaces import warnings warnings.filterwarnings("ignore") from pydub import AudioSegment # If m4a audio, convert to wav (Python) def convert_to_wav(audio_file): audio = AudioSegment.from_file(audio_file, format="m4a") wav_file = "temp.wav" audio.export(wav_file, format="wav") return wav_file import torch from transformers import pipeline, AutoProcessor # AutoModelForSpeechSeq2Seq device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') torch_dtype = torch.float32 pipe = pipeline("automatic-speech-recognition", model="NbAiLabBeta/nb-whisper-large", device=device, torch_dtype=torch_dtype) # @spaces.GPU(queue=True) # Initialize processor before using it in the function processor = AutoProcessor.from_pretrained("NbAiLabBeta/nb-whisper-large") language = "no" task = "transcribe" def transcribe_audio(audio_file, forced_decoder_ids): if audio_file.endswith(".m4a"): audio_file = convert_to_wav(audio_file) start_time = time.time() forced_decoder_ids = processor.get_decoder_prompt_ids(language="no", task="transcribe") with torch.no_grad(): output = pipe(audio_file, chunk_length_s=30, generate_kwargs={"forced_decoder_ids": forced_decoder_ids}) text = output["text"] end_time = time.time() output_time = end_time - start_time word_count = len(text.split()) result = f"Time taken: {output_time:.2f} seconds\nNumber of words: {word_count}" return text, result # [VERSION 3: full-on w/ 3 styles for summarization] import nltk from nltk.tokenize import word_tokenize, sent_tokenize from nltk.corpus import stopwords import networkx as nx from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity import pandas as pd import numpy as np import re nltk.download('punkt') nltk.download('stopwords') WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip())) def clean_text(text): text = re.sub(r'https?:\/\/.*[\r\n]*', '', str(text), flags=re.MULTILINE) text = re.sub(r'\', ' ', str(text)) text = re.sub(r'\'', '', str(text)) text = re.sub(r'«', '', str(text)) text = re.sub(r'»', '', str(text)) text = re.sub(r'–', '-', str(text)) text = re.sub(r'…', '.', str(text)) text = re.sub(r'[^\x00-\x7F]+', ' ', str(text)) return text def preprocess_text(text): try: words = word_tokenize(text) stop_words = set(stopwords.words('norwegian')) words_without_stopwords = [word for word in words if word.lower() not in stop_words] processed_text = ' '.join(words_without_stopwords) return processed_text except Exception as e: st.error(f"Error during text preprocessing: {e}") return None from transformers import AutoTokenizer, AutoModelForSeq2SeqLM summarization_model = AutoModelForSeq2SeqLM.from_pretrained("t5-base", return_dict=True, torch_dtype=torch.float16) summarization_tokenizer = AutoTokenizer.from_pretrained("t5-base") summarization_model.to(device) # @spaces.GPU(queue=True) def summarize_text(text): preprocessed_text = preprocess_text(text) if preprocessed_text is None: return None inputs = summarization_tokenizer([text], max_length=1024, return_tensors="pt", truncation=True) inputs = inputs.to(device) summary_ids = summarization_model.generate(inputs.input_ids, num_beams=5, max_length=150, early_stopping=True) summary = summarization_tokenizer.decode(summary_ids[0], skip_special_tokens=True) return summary def build_similarity_matrix(sentences, stop_words): similarity_matrix = nx.Graph() for i, tokens_a in enumerate(sentences): for j, tokens_b in enumerate(sentences): if i != j: common_words = set(tokens_a) & set(tokens_b) similarity_matrix.add_edge(i, j, weight=len(common_words)) return similarity_matrix def graph_based_summary(text, num_paragraphs=3): sentences = text.strip().split(".") if len(sentences) < num_paragraphs: return sentences sentence_tokens = [word_tokenize(sent) for sent in sentences] stop_words = set(stopwords.words('norwegian')) filtered_tokens = [[word for word in tokens if word.lower() not in stop_words] for tokens in sentence_tokens] similarity_matrix = build_similarity_matrix(filtered_tokens, stop_words) scores = nx.pagerank(similarity_matrix) ranked_sentences = sorted(((scores[i], sent) for i, sent in enumerate(sentences)), reverse=True) summary = [sent for _, sent in ranked_sentences[:num_paragraphs]] return summary def lex_rank_summary(text, num_paragraphs=3, threshold=0.1): sentences = nltk.sent_tokenize(text) if len(sentences) < num_paragraphs: return sentences stop_words = set(stopwords.words('norwegian')) vectorizer = TfidfVectorizer(stop_words=list(stop_words)) X = vectorizer.fit_transform(sentences) similarity_matrix = cosine_similarity(X, X) for i in range(len(similarity_matrix)): # threshold for j in range(len(similarity_matrix[i])): if similarity_matrix[i][j] < threshold: similarity_matrix[i][j] = 0.0 nx_graph = nx.from_numpy_array(similarity_matrix) scores = nx.pagerank(nx_graph) ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True) summary = [ranked_sentences[i][1] for i in range(num_paragraphs)] return summary def text_rank_summary(text, num_paragraphs=3): sentences = nltk.sent_tokenize(text) if len(sentences) < num_paragraphs: return sentences stop_words = set(stopwords.words('norwegian')) vectorizer = TfidfVectorizer(stop_words=list(stop_words)) X = vectorizer.fit_transform(sentences) similarity_matrix = cosine_similarity(X, X) nx_graph = nx.from_numpy_array(similarity_matrix) # graph, nodes (i.e sentences) & edges are similarity scores (is cool) scores = nx.pagerank(nx_graph) # PageRank algorithm, scoring sentences ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True) # rank by PageRank scores summary = [ranked_sentences[i][1] for i in range(num_paragraphs)] # top sentences for summary return ' '.join(summary) import gradio as gr from fpdf import FPDF from PIL import Image def save_to_pdf(text, summary): pdf = FPDF() pdf.add_page() pdf.set_font("Arial", size=12) if text: pdf.multi_cell(0, 10, "text:\n" + text) # paragraph space pdf.ln(10) if summary: pdf.multi_cell(0, 10, "Summary:\n" + summary) pdf_output_path = "transcription.pdf" pdf.output(pdf_output_path) return pdf_output_path banner_html = """
Banner
""" iface = gr.Interface( fn=transcribe_audio, inputs=gr.Audio(type="filepath"), outputs=gr.Textbox(label="Transcription"), # Corrected component title="SW Transcription App", description="Upload an audio file to get the text", theme="default", live=False ) iface = gr.Blocks() with iface: gr.HTML(banner_html) gr.Markdown("# Vi har nå muligheten til å oversette lydfiler til norsk skrift.") with gr.Tabs(): with gr.TabItem("Transcription"): audio_input = gr.Audio(type="filepath") text_output = gr.Textbox(label="text") result_output = gr.Textbox(label="Time taken and Number of words") transcribe_button = gr.Button("Transcribe") transcribe_button.click( fn=transcribe_audio, inputs=[audio_input], outputs=[text_output, result_output] ) with gr.TabItem("Summary_t1"): summary_output = gr.Textbox(label="Summary | Graph-based") summarize_button = gr.Button("Summarize") def summarize(text): if not text: return "Warning: a text must be available." summary = graph_based_summary(text) return summary summarize_button.click( fn=summarize, inputs=[text_output], outputs=summary_output ) with gr.TabItem("LexRank"): summary_output = gr.Textbox(label="Summary | LexRank") summarize_button = gr.Button("Summarize") def summarize(text): if not text: return "Warning: a text must be available." summary = lex_rank_summary(text) return summary summarize_button.click( fn=summarize, inputs=[text_output], outputs=summary_output ) with gr.TabItem("TextRank"): summary_output = gr.Textbox(label="Summary | TextRank") summarize_button = gr.Button("Summarize") def summarize(text): if not text: return "Warning: a text must be available." summary = text_rank_summary(text) return summary summarize_button.click( fn=summarize, inputs=[text_output], outputs=summary_output ) with gr.TabItem("Download PDF"): pdf_text_only = gr.Button("Download PDF with text Only") pdf_summary_only = gr.Button("Download PDF with Summary Only") pdf_both = gr.Button("Download PDF with Both") pdf_output_text_only = gr.File(label="Download PDF") pdf_output_summary_only = gr.File(label="Download PDF") pdf_output_both = gr.File(label="Download PDF") def generate_pdf_text_only(text): return save_to_pdf(text, "") def generate_pdf_summary_only(summary): return save_to_pdf("", summary) def generate_pdf_both(text, summary): return save_to_pdf(text, summary) pdf_text_only.click( fn=generate_pdf_text_only, inputs=[text_output], outputs=[pdf_output_text_only] ) pdf_summary_only.click( fn=generate_pdf_summary_only, inputs=[summary_output], outputs=[pdf_output_summary_only] ) pdf_both.click( fn=generate_pdf_both, inputs=[text_output, summary_output], outputs=[pdf_output_both] ) iface.launch(share=True, debug=True)