Spaces:

camparchimedes
/

nb

Build error

App Files Files

nb / app.py

camparchimedes

Update app.py

dee4184 verified 9 months ago

raw

history blame

7.21 kB

	### -----------------------------------------------------------------------
	### (BASE, Revised) version_1.07 ALPHA, app.py
	### -----------------------------------------------------------------------

	# -------------------------------------------------------------------------
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# -------------------------------------------------------------------------

	import spaces
	import gradio as gr
	from PIL import Image
	#from pydub import AudioSegment
	#from scipy.io import wavfile

	import os
	import re
	import time
	import warnings
	#import datetime
	#import pandas as pd
	#import csv
	import subprocess
	from pathlib import Path
	import tempfile
	from fpdf import FPDF

	import psutil
	from gpuinfo import GPUInfo

	#import numpy as np
	import torch
	#import torchaudio
	#import torchaudio.transforms as transforms

	from transformers import pipeline #AutoModel

	#import spacy
	#import networkx as nx
	#from sklearn.feature_extraction.text import TfidfVectorizer
	#from sklearn.metrics.pairwise import cosine_similarity

	warnings.filterwarnings("ignore")

	# ------------header section------------
	HEADER_INFO = """
	# SWITCHVOX ✨\|🇳🇴 Transkribering av lydfiler til norsk skrift
	""".strip()
	LOGO = "https://cdn-lfs-us-1.huggingface.co/repos/fe/3b/fe3bd7c8beece8b087fddcc2278295e7f56c794c8dcf728189f4af8bddc585e1/24ad06a03a5bc66f3eba361b94e45ad17e46f98b76632f2d17faf8a0b4f9ab6b?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27banner_trans.png%3B+filename%3D%22banner_trans.png%22%3B&response-content-type=image%2Fpng&Expires=1725145079&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyNTE0NTA3OX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2ZlLzNiL2ZlM2JkN2M4YmVlY2U4YjA4N2ZkZGNjMjI3ODI5NWU3ZjU2Yzc5NGM4ZGNmNzI4MTg5ZjRhZjhiZGRjNTg1ZTEvMjRhZDA2YTAzYTViYzY2ZjNlYmEzNjFiOTRlNDVhZDE3ZTQ2Zjk4Yjc2NjMyZjJkMTdmYWY4YTBiNGY5YWI2Yj9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoifV19&Signature=PCB1BZeLzsZXnn4lRi1Fj50%7E0E4G39u6-GKLNLLgxhDyhXlqb3BJkR7IOgdyjuNyBh8Iz2d7QqhzNSsOlQmqR30cJLl6aDM5eJO01OlWXoD3Z0KcphoVBFyyrkoxe2oS8i2mdlbFRYn7oc%7EhyOcW46zR6HtqAB91iEydhEa5WTyz3C9nWasgMZevb0vRJtzwhplM9e-%7EbRrZTm2fMzkL14IGWpTpUOGBe93BDSAYbPhrZK1jvuY8p0Tmy1iEKVP3Zdzix5U5lrbxit5luitEhK8x6q2t63Gdv7F0CZvjQtTh7MYkB5GNiru8bTGKAgCdHGiZbG7VCGfhlX3UKvUTPg__&Key-Pair-Id=K24J24Z295AEI9"
	SIDEBAR_INFO = f"""
	<div align="center">
	<img src="{LOGO}" style="width: 100%; height: auto;"/>
	</div>
	"""

	@spaces.GPU(duration=120)
	def transcribe(microphone, file_upload):

	file = microphone if microphone is not None else file_upload
	start_time = time.time()

	#--------------____________________________________________--------------"

	device = "cuda" if torch.cuda.is_available() else "cpu"
	pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", device=device)
	text = pipe(file)["text"]

	#--------------____________________________________________--------------"

	end_time = time.time()
	output_time = end_time - start_time
	word_count = len(text.split())

	# --GPU metrics
	memory = psutil.virtual_memory()

	# Default GPU utilization and memory to 0 in case of an error
	gpu_utilization = 0
	gpu_memory = 0
	try:
	gpu_utilization, gpu_memory = GPUInfo.gpu_usage()
	gpu_utilization = gpu_utilization[0] if len(gpu_utilization) > 0 else 0
	gpu_memory = gpu_memory[0] if len(gpu_memory) > 0 else 0
	except Exception as e:
	print(f"Error retrieving GPU info: {e}")

	# --CPU metric
	cpu_usage = psutil.cpu_percent(interval=1)

	# --system info string
	system_info = f"""
	Processing time: {output_time:.2f} seconds.
	Number of words: {word_count}
	"""
	# Memory: {memory.total / (1024 1024 * 1024):.2f}GB, used: {memory.percent}%, available: {memory.available / (1024 * 1024 * 1024):.2f}GB.*
	# GPU Utilization: {gpu_utilization}%, GPU Memory: {gpu_memory}
	# CPU Usage: {cpu_usage}%
	return text, system_info

	def save_to_pdf(text, summary):
	pdf = FPDF()
	pdf.add_page()
	pdf.set_font("Arial", size=12)

	if text:
	pdf.multi_cell(0, 10, "Transkribert Tekst:\n" + text)

	pdf.ln(10) # Paragraph metric

	if summary:
	pdf.multi_cell(0, 10, "Summary:\n" + summary)

	pdf_output_path = "transcription_.pdf"
	pdf.output(pdf_output_path)
	return pdf_output_path

	css = """
	#transcription_output textarea {
	background-color: #000000; /* black */
	color: #00FF00 !important; /* text color */
	font-size: 16px; /* font size */
	}

	#system_info_box textarea {
	background-color: #ffe0b3; /* orange */
	color: black !important; /* text color */
	font-size: 14px; /* font size */
	}
	"""

	iface = gr.Blocks(css=css)

	with iface:

	gr.HTML(SIDEBAR_INFO)
	gr.Markdown(HEADER_INFO)

	with gr.Row():
	gr.Markdown('''
	##### 🔊 Last opp lydfila
	##### ☕️ Trykk på "Transkriber" knappen og vent på svar
	##### ⚡️ Går rimelig bra kjapt med Norwegian NB-Whisper Large..
	##### 😅 Planlegger tilleggs-funksjoner senere
	##### 🎤 Bruk av mikrofon mulig (ikke testet)
	''')
	microphone = gr.Audio(sources="microphone", type="filepath")
	upload = gr.Audio(sources="upload", type="filepath")
	transcribe_btn = gr.Button("Transkriber)

	with gr.Row():
	with gr.Column(scale=3):
	text_output = gr.Textbox(label="Transkribert Tekst", elem_id="transcription_output")
	system_info = gr.Textbox(label="Antall sekunder, ord:", elem_id="system_info_box")

	with gr.Tabs():
	with gr.TabItem("Download PDF"):
	pdf_text_only = gr.Button("Last ned pdf med resultat")
	pdf_output = gr.File(label="/.docx?")

	pdf_text_only.click(fn=lambda text: save_to_pdf(text, ""), inputs=[text_output], outputs=[pdf_output])

	with gr.Row():
	gr.Markdown('''
	<div align="center">
	<a href="https://opensource.com/resources/what-open-source">
	<img src="https://badgen.net/badge/Open%20Source%20%3F/Yes%21/blue?icon=github" alt="Open Source? Yes!">
	</a>
	<span style="display:inline-block; width: 20px;"></span>
	<a href="https://opensource.org/licenses/Apache-2.0">
	<img src="https://img.shields.io/badge/License-Apache_2.0-blue.svg" alt="License: Apache 2.0">
	</a>
	</div>
	''')

	transcribe_btn.click(fn=transcribe, inputs=[microphone, upload], outputs=[text_output, system_info])



	iface.launch(share=True, debug=True)