Spaces:
Build error
Build error
### ----------------------------------------------------------------------- | |
### (FULL, Revised) version_1.07ALPHA_app.py | |
### ----------------------------------------------------------------------- | |
# ------------------------------------------------------------------------- | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# ------------------------------------------------------------------------- | |
import spaces | |
import gradio as gr | |
from PIL import Image | |
#from pydub import AudioSegment | |
#from scipy.io import wavfile | |
import os | |
import re | |
import time | |
import warnings | |
#import datetime | |
#import pandas as pd | |
#import csv | |
import subprocess | |
from pathlib import Path | |
import tempfile | |
from fpdf import FPDF | |
import psutil | |
from gpuinfo import GPUInfo | |
import numpy as np | |
import torch | |
import torchaudio | |
import torchaudio.transforms as transforms | |
from transformers import pipeline, AutoModel | |
import spacy | |
import networkx as nx | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics.pairwise import cosine_similarity | |
warnings.filterwarnings("ignore") | |
# ------------header section------------ | |
HEADER_INFO = """ | |
# WEB APP ✨| Norwegian WHISPER Model | |
Switch Work [Transkribering av lydfiler til norsk skrift] | |
""".strip() | |
LOGO = "https://cdn-lfs-us-1.huggingface.co/repos/fe/3b/fe3bd7c8beece8b087fddcc2278295e7f56c794c8dcf728189f4af8bddc585e1/5112f67899d65e9797a7a60d05f983cf2ceefbe2f7cba74eeca93a4e7061becc?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27logo.png%3B+filename%3D%22logo.png%22%3B&response-content-type=image%2Fpng&Expires=1724881270&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyNDg4MTI3MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2ZlLzNiL2ZlM2JkN2M4YmVlY2U4YjA4N2ZkZGNjMjI3ODI5NWU3ZjU2Yzc5NGM4ZGNmNzI4MTg5ZjRhZjhiZGRjNTg1ZTEvNTExMmY2Nzg5OWQ2NWU5Nzk3YTdhNjBkMDVmOTgzY2YyY2VlZmJlMmY3Y2JhNzRlZWNhOTNhNGU3MDYxYmVjYz9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoifV19&Signature=ipo8wTjtC7R0QHbo%7Et9Q5CTaI3cZKxM0beajqlApfm5fh7%7EW-FULu1-ISL5bkowBSw9m5RdGoyOqj336OSS5fPD%7EnzYNmAMd3T5bx2-KfCDh6jz0HVECt8S7HeIu%7El2TetxrzL2tdHw4Np4Zpa8JKOnNnje24fF0Nr-xUS2dvPJf54rIL70-iWVXXhw8owxt0%7E1CJsUHC9oibp9B4mZcyWvvRldhDopiQBYELusZdTW3qvtTBK083WP3gHQxadQp8UDVTPZ0g3i112G2NfFJB%7Epa70XeN8m3E6ORx6pVH%7EW6IzjvmapWSF-tmXH-26wYG8aof%7E1U7enbR1w2QBTS-g__&Key-Pair-Id=K24J24Z295AEI9" | |
SIDEBAR_INFO = f""" | |
<div align="center"> | |
<img src="{LOGO}" style="width: 100%; height: auto;"/> | |
</div> | |
""" | |
# ------------transcribe section------------ | |
pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", chunk_length_s=30, generate_kwargs={'task': 'transcribe', 'language': 'no'}) | |
def transcribe(microphone, file_upload, batch_size=15): | |
warn_output = "" | |
if (microphone is not None) and (file_upload is not None): | |
warn_output = ( | |
"WARNING: You've uploaded an audio file and used the microphone. " | |
"The recorded file from the microphone will be used and the uploaded audio will be discarded.\n" | |
) | |
elif (microphone is None) and (file_upload is None): | |
return "ERROR: You have to either use the microphone or upload an audio file" | |
file = microphone if microphone is not None else file_upload | |
start_time = time.time() | |
text = pipe(file, batch_size=batch_size, return_timestamps=False)["text"] | |
end_time = time.time() | |
output_time = end_time - start_time | |
word_count = len(text.split()) | |
# --GPU metrics | |
memory = psutil.virtual_memory() | |
gpu_utilization, gpu_memory = GPUInfo.gpu_usage() | |
gpu_utilization = gpu_utilization[0] if len(gpu_utilization) > 0 else 0 | |
gpu_memory = gpu_memory[0] if len(gpu_memory) > 0 else 0 | |
# --CPU metric | |
cpu_usage = psutil.cpu_percent(interval=1) | |
# --system info string | |
system_info = f""" | |
*Memory: {memory.total / (1024 * 1024 * 1024):.2f}GB, used: {memory.percent}%, available: {memory.available / (1024 * 1024 * 1024):.2f}GB.* | |
*Processing time: {output_time:.2f} seconds.* | |
*Number of words: {word_count}* | |
*GPU Utilization: {gpu_utilization}%, GPU Memory: {gpu_memory}* | |
*CPU Usage: {cpu_usage}%* | |
""" | |
return warn_output + text.strip(), system_info | |
# ------------summary section------------ | |
# ------------for app integration later------------ | |
nlp = spacy.blank("nb") # codename 'nb' = Norwegian Bokmål | |
nlp.add_pipe('sentencizer') | |
spacy_stop_words = spacy.lang.nb.stop_words.STOP_WORDS | |
summarization_model = AutoModel.from_pretrained("NbAiLab/nb-bert-large") | |
# pipe = pipeline("fill-mask", model="NbAiLab/nb-bert-large") | |
def clean_text(text): | |
text = re.sub(r'https?:\/\/.*[\r\n]*', '', text) | |
text = re.sub(r'[^\w\s]', '', text) | |
text = re.sub(r'\s+', ' ', text).strip() | |
return text | |
def preprocess_text(text, file_upload): | |
if (text is not None) and (file_upload is None): | |
doc = nlp(text) | |
elif (text is None) and (file_upload is not None): | |
doc = nlp(file_upload) | |
stop_words = spacy_stop_words | |
words = [token.text for token in doc if token.text.lower() not in stop_words] | |
return ' '.join(words) | |
def summarize_text(text, file_upload): | |
# | |
# ----add same if/elif logic as above here---- | |
# | |
preprocessed_text = preprocess_text(text) | |
inputs = summarization_model(preprocessed_text, max_length=1024, return_tensors="pt", truncation=True) | |
inputs = inputs.to(device) | |
summary_ids = summarization_model.generate(inputs.input_ids, num_beams=5, max_length=150, early_stopping=True) | |
return summarization_model.decode(summary_ids[0], skip_special_tokens=True) | |
def build_similarity_matrix(sentences): | |
similarity_matrix = nx.Graph() | |
for i, tokens_a in enumerate(sentences): | |
for j, tokens_b in enumerate(sentences): | |
if i != j: | |
common_words = set(tokens_a) & set(tokens_b) | |
similarity_matrix.add_edge(i, j, weight=len(common_words)) | |
return similarity_matrix | |
# PageRank | |
def graph_based_summary(text, file_upload, num_paragraphs=3): | |
# | |
# ----add same if/elif logic as above here---- | |
# | |
sentences = [sent.text for sent in doc.sents] | |
if len(sentences) < num_paragraphs: | |
return ' '.join(sentences) | |
sentence_tokens = [nlp(sent) for sent in sentences] | |
stop_words = spacy_stop_words | |
filtered_tokens = [[token.text for token in tokens if token.text.lower() not in stop_words] for tokens in sentence_tokens] | |
similarity_matrix = build_similarity_matrix(filtered_tokens) | |
scores = nx.pagerank(similarity_matrix) | |
ranked_sentences = sorted(((scores[i], sent) for i, sent in enumerate(sentences)), reverse=True) | |
return ' '.join([sent for _, sent in ranked_sentences[:num_paragraphs]]) | |
def lex_rank_summary(text, file_upload, num_paragraphs=3, threshold=0.1): | |
if (text is not None) and (file_upload is None): | |
doc = nlp(text) | |
elif (text is None) and (file_upload is not None): | |
doc = nlp(file_upload) | |
sentences = [sent.text for sent in doc.sents] | |
if len(sentences) < num_paragraphs: | |
return ' '.join(sentences) | |
stop_words = spacy_stop_words | |
vectorizer = TfidfVectorizer(stop_words=list(stop_words)) | |
X = vectorizer.fit_transform(sentences) | |
similarity_matrix = cosine_similarity(X, X) | |
# Apply threshold@similarity matrix | |
similarity_matrix[similarity_matrix < threshold] = 0 | |
nx_graph = nx.from_numpy_array(similarity_matrix) | |
scores = nx.pagerank(nx_graph) | |
ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True) | |
return ' '.join([ranked_sentences[i][1] for i in range(num_paragraphs)]) | |
def text_rank_summary(text, file_upload, num_paragraphs=3): | |
if (text is not None) and (file_upload is not None): | |
doc = nlp(text) | |
elif (text is None) and (file_upload is not None): | |
doc = nlp(file_upload) | |
sentences = [sent.text for sent in doc.sents] | |
if len(sentences) < num_paragraphs: | |
return ' '.join(sentences) | |
stop_words = spacy_stop_words | |
vectorizer = TfidfVectorizer(stop_words=list(stop_words)) | |
X = vectorizer.fit_transform(sentences) | |
similarity_matrix = cosine_similarity(X, X) | |
nx_graph = nx.from_numpy_array(similarity_matrix) | |
scores = nx.pagerank(nx_graph) | |
ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True) | |
return ' '.join([ranked_sentences[i][1] for i in range(num_paragraphs)]) | |
def save_to_pdf(text, summary): | |
pdf = FPDF() | |
pdf.add_page() | |
pdf.set_font("Arial", size=12) | |
# | |
# ----add same if/elif logic as above here---- | |
# | |
if text: | |
pdf.multi_cell(0, 10, "Text:\n" + text) | |
pdf.ln(10) # Paragraph metric | |
if summary: | |
pdf.multi_cell(0, 10, "Summary:\n" + summary) | |
pdf_output_path = "transcription_.pdf" | |
pdf.output(pdf_output_path) | |
return pdf_output_path | |
iface = gr.Blocks() | |
with iface: | |
gr.HTML(SIDEBAR_INFO) | |
gr.Markdown(HEADER_INFO) | |
with gr.Row(): | |
gr.Markdown(''' | |
##### Here you will get transcription output | |
##### ''') | |
microphone = gr.Audio(sources="microphone", type="filepath") | |
upload = gr.Audio(sources="upload", type="filepath") | |
transcribe_btn = gr.Button("Transcribe Interview") | |
text_output = gr.Textbox() | |
system_info = gr.Textbox(label="System Info") | |
# --basic syntax!: positional argument ")" follows keyword argument, e.g ..., system_info :P | |
transcribe_btn.click(fn=transcribe_audio,[microphone, upload], outputs=[text_output, system_info]) | |
with gr.Tabs(): | |
with gr.TabItem("Summary | PageRank"): | |
text_input_graph = gr.Textbox(label="Input Text", placeholder="txt2summarize") | |
summary_output_graph = gr.Textbox(label="PageRank | token-based similarity") | |
gr.Markdown(""" | |
**token-based**: similarity matrix edge weights representing token overlap/ | |
ranked by their centrality in the graph (good with dense inter-sentence relationships) | |
""") | |
gr.Markdown(""" | |
*Bjørn*: **gir sammendrag som fanger opp de mest relevante setninger i teksten** | |
""") | |
summarize_transcribed_button_graph = gr.Button("Summary of Transcribed Text, Click Here") | |
summarize_transcribed_button_graph.click(fn=lambda text: graph_based_summary(text), inputs=[transcribed_text], outputs=[summary_output_graph]) | |
summarize_uploaded_button_graph = gr.Button("Upload Text to Summarize, Click Here") | |
summarize_uploaded_button_graph.click(fn=graph_based_summary(file_upload), inputs=[text_input_graph], outputs=[summary_output_graph]) | |
with gr.TabItem("Summary | LexRank"): | |
text_output = gr.Textbox(label="Transcription Output") | |
text_input_lex = gr.Textbox(label="Input Text", placeholder="txt2summarize") | |
summary_output_lex = gr.Textbox(label="LexRank | cosine similarity") | |
gr.Markdown(""" | |
**semantic**: TF-IDF vectorization@cosine similarity matrix, ranked by eigenvector centrality. | |
(good for sparse graph structures with thresholding) | |
""") | |
gr.Markdown(""" | |
*Bjørn*: **gir sammendrag som best fanger opp betydningen av hele teksten** | |
""") | |
summarize_transcribed_button_lex = gr.Button("Summary of Transcribed Text, Click Here") | |
summarize_transcribed_button_lex.click(fn=lambda text: lex_rank_summary(text), inputs=[transcribed_text], outputs=[summary_output_lex]) | |
summarize_uploaded_button_lex = gr.Button("Upload Text to Summarize, Click Here") | |
summarize_uploaded_button_lex.click(fn=lex_rank_summary(file_upload), inputs=[text_input_lex], outputs=[summary_output_lex]) | |
with gr.TabItem("Summary | TextRank"): | |
text_input_text_rank = gr.Textbox(label="Input Text", placeholder="txt2summarize") | |
summary_output_text_rank = gr.Textbox(label="TextRank | lexical similarity") | |
gr.Markdown(""" | |
**sentence**: graph with weighted edges based on lexical similarity. (i.e" "sentence similarity"word overlap)/sentence similarity | |
""") | |
gr.Markdown(""" | |
*Bjørn*: **sammendrag basert på i de setningene som ligner mest på hverandre fra teksten** | |
""") | |
summarize_transcribed_button_text_rank = gr.Button("Summary of Transcribed Text, Click Here") | |
summarize_transcribed_button_text_rank.click(fn=lambda text: text_rank_summary(text), inputs=[transcribed_text], outputs=[summary_output_text_rank]) | |
summarize_uploaded_button_text_rank = gr.Button("Upload Text to Summarize, Click Here") | |
summarize_uploaded_button_text_rank.click(fn=text_rank_summary(file_upload), inputs=[text_input_text_rank], outputs=[summary_output_text_rank]) | |
with gr.TabItem("Download PDF"): | |
pdf_text_only = gr.Button("Download PDF with Transcribed Text Only") | |
pdf_summary_only = gr.Button("Download PDF with Summary-of-Transcribed-Text Only") | |
pdf_both = gr.Button("Download PDF with Both") | |
pdf_output = gr.File(label="Download PDF") | |
pdf_text_only.click(fn=lambda text: save_to_pdf(text, ""), inputs=[transcribed_text], outputs=[pdf_output]) | |
pdf_summary_only.click(fn=lambda summary: save_to_pdf("", summary), inputs=[summary_output_graph, summary_output_lex, summary_output_text_rank], outputs=[pdf_output]) # Includes all summary outputs | |
pdf_both.click(fn=lambda text, summary: save_to_pdf(text, summary), inputs=[transcribed_text, summary_output_graph], outputs=[pdf_output]) | |