Spaces:

camparchimedes
/

nb

Build error

App Files Files

camparchimedes commited on Aug 19, 2024

Commit

7ef26c1

verified ·

1 Parent(s): 4012d3e

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -14

app.py CHANGED Viewed

@@ -22,6 +22,7 @@ import warnings
 from pydub import AudioSegment
 import torch
 import torchaudio
 from transformers import WhisperTokenizer, WhisperForConditionalGeneration, WhisperProcessor
 from huggingface_hub import model_info
 import spacy
@@ -75,9 +76,15 @@ def transcribe_audio(audio_file, chunk_length_s=30):
     start_time = time.time()
-    # Load the audio waveform using torchaudio
     waveform, sample_rate = torchaudio.load(audio_file)
     # Calculate the number of chunks
     chunk_size = chunk_length_s * sample_rate
     num_chunks = waveform.shape[1] // chunk_size + int(waveform.shape[1] % chunk_size != 0)
@@ -107,12 +114,12 @@ def transcribe_audio(audio_file, chunk_length_s=30):
             chunk_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
             full_text.append(chunk_text)
-    # Combine the transcribed text from all chunks
     text = " ".join(full_text)
     output_time = time.time() - start_time
-    # Audio duration (in seconds)
     audio_duration = waveform.shape[1] / sample_rate
     # Real-time Factor (RTF)
@@ -132,26 +139,26 @@ def transcribe_audio(audio_file, chunk_length_s=30):
     return text, result
-# Clean and preprocess text for summarization
 def clean_text(text):
     text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)
     text = re.sub(r'[^\w\s]', '', text)
     text = re.sub(r'\s+', ' ', text).strip()
     return text
-nlp = spacy.blank("nb")  # 'nb' is code for Norwegian Bokmål
 spacy_stop_words = spacy.lang.nb.stop_words.STOP_WORDS
 def preprocess_text(text):
     # Process the text with SpaCy
     doc = nlp(text)
-    # Use SpaCy's stop words directly
     stop_words = spacy_stop_words
     # Filter out stop words
     words = [token.text for token in doc if token.text.lower() not in stop_words]
     return ' '.join(words)
-# Summarize text using the T5 model
 def summarize_text(text):
     preprocessed_text = preprocess_text(text)
     inputs = summarization_tokenizer(preprocessed_text, max_length=1024, return_tensors="pt", truncation=True)
@@ -159,7 +166,7 @@ def summarize_text(text):
     summary_ids = summarization_model.generate(inputs.input_ids, num_beams=5, max_length=150, early_stopping=True)
     return summarization_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
-# Build similarity matrix for graph-based summary
 def build_similarity_matrix(sentences, stop_words):
     similarity_matrix = nx.Graph()
     for i, tokens_a in enumerate(sentences):
@@ -169,7 +176,7 @@ def build_similarity_matrix(sentences, stop_words):
                 similarity_matrix.add_edge(i, j, weight=len(common_words))
     return similarity_matrix
-# Graph-based summarization
 def graph_based_summary(text, num_paragraphs=3):
     doc = nlp(text)
     sentences = [sent.text for sent in doc.sents]
@@ -185,7 +192,7 @@ def graph_based_summary(text, num_paragraphs=3):
     ranked_sentences = sorted(((scores[i], sent) for i, sent in enumerate(sentences)), reverse=True)
     return ' '.join([sent for _, sent in ranked_sentences[:num_paragraphs]])
-# LexRank summarization
 def lex_rank_summary(text, num_paragraphs=3, threshold=0.1):
     doc = nlp(text)
     sentences = [sent.text for sent in doc.sents]
@@ -197,14 +204,14 @@ def lex_rank_summary(text, num_paragraphs=3, threshold=0.1):
     X = vectorizer.fit_transform(sentences)
     similarity_matrix = cosine_similarity(X, X)
-    # Apply threshold to the similarity matrix
     similarity_matrix[similarity_matrix < threshold] = 0
     nx_graph = nx.from_numpy_array(similarity_matrix)
     scores = nx.pagerank(nx_graph)
     ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)
     return ' '.join([ranked_sentences[i][1] for i in range(num_paragraphs)])
-# TextRank summarization
 def text_rank_summary(text, num_paragraphs=3):
     doc = nlp(text)
     sentences = [sent.text for sent in doc.sents]
@@ -222,7 +229,7 @@ def text_rank_summary(text, num_paragraphs=3):
     return ' '.join([ranked_sentences[i][1] for i in range(num_paragraphs)])
-# Save text and summary to PDF
 def save_to_pdf(text, summary):
     pdf = FPDF()
     pdf.add_page()
@@ -243,7 +250,7 @@ def save_to_pdf(text, summary):
 iface = gr.Blocks()
 with iface:
-    gr.Image("https://huggingface.co/spaces/camparchimedes/transcription_app/blob/main/banner_trans.png")
     gr.Markdown("*Switch Work's JoJo-versjon som webapp for transkribering av lydfiler til norsk skrift. Språkmodell: NbAiLab/nb-whisper-large, Ekstra: oppsummering, pdf-download*")
     with gr.Tabs():

 from pydub import AudioSegment
 import torch
 import torchaudio
+import torchaudio.transforms as transforms
 from transformers import WhisperTokenizer, WhisperForConditionalGeneration, WhisperProcessor
 from huggingface_hub import model_info
 import spacy
     start_time = time.time()
+    # Load waveform/torchaudio
     waveform, sample_rate = torchaudio.load(audio_file)
+    # Resample to 16000 Hz if 32K
+    if sample_rate != 16000:
+        resampler = transforms.Resample(orig_freq=sample_rate, new_freq=16000)
+        waveform = resampler(waveform)
+        sample_rate = 16000
     # Calculate the number of chunks
     chunk_size = chunk_length_s * sample_rate
     num_chunks = waveform.shape[1] // chunk_size + int(waveform.shape[1] % chunk_size != 0)
             chunk_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
             full_text.append(chunk_text)
+    # Combine transcribed text/all chunks
     text = " ".join(full_text)
     output_time = time.time() - start_time
+    # Dduration (secs)
     audio_duration = waveform.shape[1] / sample_rate
     # Real-time Factor (RTF)
     return text, result
+# Clean and preprocess/@summarization
 def clean_text(text):
     text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)
     text = re.sub(r'[^\w\s]', '', text)
     text = re.sub(r'\s+', ' ', text).strip()
     return text
+nlp = spacy.blank("nb")  # 'nb' ==> codename = Norwegian Bokmål
 spacy_stop_words = spacy.lang.nb.stop_words.STOP_WORDS
 def preprocess_text(text):
     # Process the text with SpaCy
     doc = nlp(text)
+    # SpaCy's stop top wrds direct
     stop_words = spacy_stop_words
     # Filter out stop words
     words = [token.text for token in doc if token.text.lower() not in stop_words]
     return ' '.join(words)
+# Summarize w/T5 model
 def summarize_text(text):
     preprocessed_text = preprocess_text(text)
     inputs = summarization_tokenizer(preprocessed_text, max_length=1024, return_tensors="pt", truncation=True)
     summary_ids = summarization_model.generate(inputs.input_ids, num_beams=5, max_length=150, early_stopping=True)
     return summarization_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
+# Builds similarity matrix
 def build_similarity_matrix(sentences, stop_words):
     similarity_matrix = nx.Graph()
     for i, tokens_a in enumerate(sentences):
                 similarity_matrix.add_edge(i, j, weight=len(common_words))
     return similarity_matrix
+# "Graph-based summarization" =====>
 def graph_based_summary(text, num_paragraphs=3):
     doc = nlp(text)
     sentences = [sent.text for sent in doc.sents]
     ranked_sentences = sorted(((scores[i], sent) for i, sent in enumerate(sentences)), reverse=True)
     return ' '.join([sent for _, sent in ranked_sentences[:num_paragraphs]])
+# LexRank
 def lex_rank_summary(text, num_paragraphs=3, threshold=0.1):
     doc = nlp(text)
     sentences = [sent.text for sent in doc.sents]
     X = vectorizer.fit_transform(sentences)
     similarity_matrix = cosine_similarity(X, X)
+    # Apply threshold@similarity matrix
     similarity_matrix[similarity_matrix < threshold] = 0
     nx_graph = nx.from_numpy_array(similarity_matrix)
     scores = nx.pagerank(nx_graph)
     ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)
     return ' '.join([ranked_sentences[i][1] for i in range(num_paragraphs)])
+# TextRank
 def text_rank_summary(text, num_paragraphs=3):
     doc = nlp(text)
     sentences = [sent.text for sent in doc.sents]
     return ' '.join([ranked_sentences[i][1] for i in range(num_paragraphs)])
+# Save text+summary/PDF
 def save_to_pdf(text, summary):
     pdf = FPDF()
     pdf.add_page()
 iface = gr.Blocks()
 with iface:
+    gr.HTML('<img src="https://huggingface.co/spaces/camparchimedes/transcription_app/blob/main/banner_trans.png" alt="Banner Image" />')
     gr.Markdown("*Switch Work's JoJo-versjon som webapp for transkribering av lydfiler til norsk skrift. Språkmodell: NbAiLab/nb-whisper-large, Ekstra: oppsummering, pdf-download*")
     with gr.Tabs():