Spaces:

camparchimedes
/

nb

Build error

App Files Files

nb / app.py

camparchimedes

Update app.py

04f2c63 verified about 1 year ago

raw

history blame

6.14 kB

	### -----------------------------------------------------------------------
	### (FULL, Revised) version_1.07ALPHA_app.py
	### -----------------------------------------------------------------------

	# -------------------------------------------------------------------------
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# -------------------------------------------------------------------------

	import spaces
	import gradio as gr
	from PIL import Image
	#from pydub import AudioSegment
	#from scipy.io import wavfile

	import os
	import re
	import time
	import warnings
	#import datetime
	#import pandas as pd
	#import csv
	import subprocess
	from pathlib import Path
	import tempfile
	from fpdf import FPDF

	import psutil
	from gpuinfo import GPUInfo

	#import numpy as np
	import torch
	#import torchaudio
	#import torchaudio.transforms as transforms

	from transformers import pipeline #AutoModel

	#import spacy
	#import networkx as nx
	#from sklearn.feature_extraction.text import TfidfVectorizer
	#from sklearn.metrics.pairwise import cosine_similarity

	warnings.filterwarnings("ignore")

	# ------------header section------------
	HEADER_INFO = """
	# WEB APP ✨\| Norwegian WHISPER Model
	Switch Work [Transkribering av lydfiler til norsk skrift]
	""".strip()
	LOGO = "https://cdn-lfs-us-1.huggingface.co/repos/fe/3b/fe3bd7c8beece8b087fddcc2278295e7f56c794c8dcf728189f4af8bddc585e1/5112f67899d65e9797a7a60d05f983cf2ceefbe2f7cba74eeca93a4e7061becc?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27logo.png%3B+filename%3D%22logo.png%22%3B&response-content-type=image%2Fpng&Expires=1724881270&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyNDg4MTI3MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2ZlLzNiL2ZlM2JkN2M4YmVlY2U4YjA4N2ZkZGNjMjI3ODI5NWU3ZjU2Yzc5NGM4ZGNmNzI4MTg5ZjRhZjhiZGRjNTg1ZTEvNTExMmY2Nzg5OWQ2NWU5Nzk3YTdhNjBkMDVmOTgzY2YyY2VlZmJlMmY3Y2JhNzRlZWNhOTNhNGU3MDYxYmVjYz9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoifV19&Signature=ipo8wTjtC7R0QHbo%7Et9Q5CTaI3cZKxM0beajqlApfm5fh7%7EW-FULu1-ISL5bkowBSw9m5RdGoyOqj336OSS5fPD%7EnzYNmAMd3T5bx2-KfCDh6jz0HVECt8S7HeIu%7El2TetxrzL2tdHw4Np4Zpa8JKOnNnje24fF0Nr-xUS2dvPJf54rIL70-iWVXXhw8owxt0%7E1CJsUHC9oibp9B4mZcyWvvRldhDopiQBYELusZdTW3qvtTBK083WP3gHQxadQp8UDVTPZ0g3i112G2NfFJB%7Epa70XeN8m3E6ORx6pVH%7EW6IzjvmapWSF-tmXH-26wYG8aof%7E1U7enbR1w2QBTS-g__&Key-Pair-Id=K24J24Z295AEI9"
	SIDEBAR_INFO = f"""
	<div align="center">
	<img src="{LOGO}" style="width: 100%; height: auto;"/>
	</div>
	"""

	# ------------transcribe section------------

	pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", chunk_length_s=30, generate_kwargs={'task': 'transcribe', 'language': 'no'})

	@spaces.GPU()
	def transcribe(microphone, file_upload, batch_size=15):
	warn_output = ""
	if (microphone is not None) and (file_upload is not None):
	warn_output = (
	"WARNING: You've uploaded an audio file and used the microphone. "
	"The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
	)

	elif (microphone is None) and (file_upload is None):
	return "ERROR: You have to either use the microphone or upload an audio file"

	file = microphone if microphone is not None else file_upload

	start_time = time.time()
	text = pipe(file, batch_size=batch_size, return_timestamps=False)["text"]

	end_time = time.time()
	output_time = end_time - start_time
	word_count = len(text.split())

	# --GPU metrics
	memory = psutil.virtual_memory()
	gpu_utilization, gpu_memory = GPUInfo.gpu_usage()
	gpu_utilization = gpu_utilization[0] if len(gpu_utilization) > 0 else 0
	gpu_memory = gpu_memory[0] if len(gpu_memory) > 0 else 0

	# --CPU metric
	cpu_usage = psutil.cpu_percent(interval=1)

	# --system info string
	system_info = f"""
	Memory: {memory.total / (1024 1024 * 1024):.2f}GB, used: {memory.percent}%, available: {memory.available / (1024 * 1024 * 1024):.2f}GB.*
	Processing time: {output_time:.2f} seconds.
	Number of words: {word_count}
	GPU Utilization: {gpu_utilization}%, GPU Memory: {gpu_memory}
	CPU Usage: {cpu_usage}%
	"""

	return warn_output + text.strip(), system_info


	def save_to_pdf(text, summary):
	pdf = FPDF()
	pdf.add_page()
	pdf.set_font("Arial", size=12)
	#
	# ----add same if/elif logic as above here----
	#
	if text:
	pdf.multi_cell(0, 10, "Text:\n" + text)

	pdf.ln(10) # Paragraph metric

	if summary:
	pdf.multi_cell(0, 10, "Summary:\n" + summary)

	pdf_output_path = "transcription_.pdf"
	pdf.output(pdf_output_path)
	return pdf_output_path


	iface = gr.Blocks()

	with iface:

	gr.HTML(SIDEBAR_INFO)
	gr.Markdown(HEADER_INFO)

	with gr.Row():
	gr.Markdown('''
	##### Here you will get transcription output
	##### ''')
	microphone = gr.Audio(sources="microphone", type="filepath")
	upload = gr.Audio(sources="upload", type="filepath")

	transcribe_btn = gr.Button("Transcribe Interview")
	text_output = gr.Textbox()
	system_info = gr.Textbox(label="System Info")

	transcribe_btn.click(fn=transcribe, inputs=[microphone, upload], outputs=[text_output, system_info])

	with gr.Tabs():
	with gr.TabItem("Download PDF"):
	pdf_text_only = gr.Button("Download PDF with Transcribed Text")
	pdf_output = gr.File(label="Download PDF")

	pdf_text_only.click(fn=lambda text: save_to_pdf(text, ""), inputs=[text_output], outputs=[pdf_output])