nb / app.py
camparchimedes's picture
Update app.py
dee4184 verified
raw
history blame
7.21 kB
### -----------------------------------------------------------------------
### (BASE, Revised) version_1.07 ALPHA, app.py
### -----------------------------------------------------------------------
# -------------------------------------------------------------------------
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# -------------------------------------------------------------------------
import spaces
import gradio as gr
from PIL import Image
#from pydub import AudioSegment
#from scipy.io import wavfile
import os
import re
import time
import warnings
#import datetime
#import pandas as pd
#import csv
import subprocess
from pathlib import Path
import tempfile
from fpdf import FPDF
import psutil
from gpuinfo import GPUInfo
#import numpy as np
import torch
#import torchaudio
#import torchaudio.transforms as transforms
from transformers import pipeline #AutoModel
#import spacy
#import networkx as nx
#from sklearn.feature_extraction.text import TfidfVectorizer
#from sklearn.metrics.pairwise import cosine_similarity
warnings.filterwarnings("ignore")
# ------------header section------------
HEADER_INFO = """
# SWITCHVOX ✨|🇳🇴 *Transkribering av lydfiler til norsk skrift*
""".strip()
LOGO = "https://cdn-lfs-us-1.huggingface.co/repos/fe/3b/fe3bd7c8beece8b087fddcc2278295e7f56c794c8dcf728189f4af8bddc585e1/24ad06a03a5bc66f3eba361b94e45ad17e46f98b76632f2d17faf8a0b4f9ab6b?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27banner_trans.png%3B+filename%3D%22banner_trans.png%22%3B&response-content-type=image%2Fpng&Expires=1725145079&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyNTE0NTA3OX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2ZlLzNiL2ZlM2JkN2M4YmVlY2U4YjA4N2ZkZGNjMjI3ODI5NWU3ZjU2Yzc5NGM4ZGNmNzI4MTg5ZjRhZjhiZGRjNTg1ZTEvMjRhZDA2YTAzYTViYzY2ZjNlYmEzNjFiOTRlNDVhZDE3ZTQ2Zjk4Yjc2NjMyZjJkMTdmYWY4YTBiNGY5YWI2Yj9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoifV19&Signature=PCB1BZeLzsZXnn4lRi1Fj50%7E0E4G39u6-GKLNLLgxhDyhXlqb3BJkR7IOgdyjuNyBh8Iz2d7QqhzNSsOlQmqR30cJLl6aDM5eJO01OlWXoD3Z0KcphoVBFyyrkoxe2oS8i2mdlbFRYn7oc%7EhyOcW46zR6HtqAB91iEydhEa5WTyz3C9nWasgMZevb0vRJtzwhplM9e-%7EbRrZTm2fMzkL14IGWpTpUOGBe93BDSAYbPhrZK1jvuY8p0Tmy1iEKVP3Zdzix5U5lrbxit5luitEhK8x6q2t63Gdv7F0CZvjQtTh7MYkB5GNiru8bTGKAgCdHGiZbG7VCGfhlX3UKvUTPg__&Key-Pair-Id=K24J24Z295AEI9"
SIDEBAR_INFO = f"""
<div align="center">
<img src="{LOGO}" style="width: 100%; height: auto;"/>
</div>
"""
@spaces.GPU(duration=120)
def transcribe(microphone, file_upload):
file = microphone if microphone is not None else file_upload
start_time = time.time()
#--------------____________________________________________--------------"
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", device=device)
text = pipe(file)["text"]
#--------------____________________________________________--------------"
end_time = time.time()
output_time = end_time - start_time
word_count = len(text.split())
# --GPU metrics
memory = psutil.virtual_memory()
# Default GPU utilization and memory to 0 in case of an error
gpu_utilization = 0
gpu_memory = 0
try:
gpu_utilization, gpu_memory = GPUInfo.gpu_usage()
gpu_utilization = gpu_utilization[0] if len(gpu_utilization) > 0 else 0
gpu_memory = gpu_memory[0] if len(gpu_memory) > 0 else 0
except Exception as e:
print(f"Error retrieving GPU info: {e}")
# --CPU metric
cpu_usage = psutil.cpu_percent(interval=1)
# --system info string
system_info = f"""
Processing time: {output_time:.2f} seconds.
Number of words: {word_count}
"""
# *Memory: {memory.total / (1024 * 1024 * 1024):.2f}GB, used: {memory.percent}%, available: {memory.available / (1024 * 1024 * 1024):.2f}GB.*
# *GPU Utilization: {gpu_utilization}%, GPU Memory: {gpu_memory}*
# *CPU Usage: {cpu_usage}%*
return text, system_info
def save_to_pdf(text, summary):
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size=12)
if text:
pdf.multi_cell(0, 10, "Transkribert Tekst:\n" + text)
pdf.ln(10) # Paragraph metric
if summary:
pdf.multi_cell(0, 10, "Summary:\n" + summary)
pdf_output_path = "transcription_.pdf"
pdf.output(pdf_output_path)
return pdf_output_path
css = """
#transcription_output textarea {
background-color: #000000; /* black */
color: #00FF00 !important; /* text color */
font-size: 16px; /* font size */
}
#system_info_box textarea {
background-color: #ffe0b3; /* orange */
color: black !important; /* text color */
font-size: 14px; /* font size */
}
"""
iface = gr.Blocks(css=css)
with iface:
gr.HTML(SIDEBAR_INFO)
gr.Markdown(HEADER_INFO)
with gr.Row():
gr.Markdown('''
##### 🔊 Last opp lydfila
##### ☕️ Trykk på "Transkriber" knappen og vent på svar
##### ⚡️ Går rimelig bra kjapt med Norwegian NB-Whisper Large..
##### 😅 Planlegger tilleggs-funksjoner senere
##### 🎤 Bruk av mikrofon mulig (*ikke testet*)
''')
microphone = gr.Audio(sources="microphone", type="filepath")
upload = gr.Audio(sources="upload", type="filepath")
transcribe_btn = gr.Button("Transkriber)
with gr.Row():
with gr.Column(scale=3):
text_output = gr.Textbox(label="Transkribert Tekst", elem_id="transcription_output")
system_info = gr.Textbox(label="Antall sekunder, ord:", elem_id="system_info_box")
with gr.Tabs():
with gr.TabItem("Download PDF"):
pdf_text_only = gr.Button("Last ned pdf med resultat")
pdf_output = gr.File(label="/.docx?")
pdf_text_only.click(fn=lambda text: save_to_pdf(text, ""), inputs=[text_output], outputs=[pdf_output])
with gr.Row():
gr.Markdown('''
<div align="center">
<a href="https://opensource.com/resources/what-open-source">
<img src="https://badgen.net/badge/Open%20Source%20%3F/Yes%21/blue?icon=github" alt="Open Source? Yes!">
</a>
<span style="display:inline-block; width: 20px;"></span>
<a href="https://opensource.org/licenses/Apache-2.0">
<img src="https://img.shields.io/badge/License-Apache_2.0-blue.svg" alt="License: Apache 2.0">
</a>
</div>
''')
transcribe_btn.click(fn=transcribe, inputs=[microphone, upload], outputs=[text_output, system_info])
iface.launch(share=True, debug=True)