Spaces:
Build error
Build error
import networkx as nx | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics.pairwise import cosine_similarity | |
import nltk | |
nltk.download('punkt') | |
from nltk.tokenize import sent_tokenize | |
import gradio as gr | |
import warnings | |
import torch | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoProcessor, AutoModelForSpeechSeq2Seq # pipeline | |
from pydub import AudioSegment | |
import soundfile as sf | |
import numpy as np | |
from fpdf import FPDF | |
from PIL import Image | |
import time | |
import os | |
import spaces | |
warnings.filterwarnings("ignore") | |
# HF_AUTH_TOKEN = os.getenv('HF_AUTH_TOKEN') | |
processor = AutoProcessor.from_pretrained("NbAiLabBeta/nb-whisper-large-semantic") | |
model = AutoModelForSpeechSeq2Seq.from_pretrained("NbAiLabBeta/nb-whisper-large-semantic") | |
model.to(device) | |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
#torch_dtype = torch.float32 | |
model.to(device) | |
def convert_to_wav(audio_file): | |
audio = AudioSegment.from_file(audio_file, format="m4a") | |
wav_file = "temp.wav" | |
audio.export(wav_file, format="wav") | |
return wav_file | |
def transcribe_audio(audio_file, batch_size=4): | |
start_time = time.time() | |
if audio_file.endswith(".m4a"): | |
audio_file = convert_to_wav(audio_file) | |
audio_input, sample_rate = sf.read(audio_file) | |
chunk_size = 16000 * 30 | |
chunks = [audio_input[i:i + chunk_size] for i in range(0, len(audio_input), chunk_size)] | |
transcription = "" | |
for i in range(0, len(chunks), batch_size): | |
batch_chunks = chunks[i:i + batch_size] | |
inputs = processor(batch_chunks, sampling_rate=16000, return_tensors="pt", padding=True) | |
inputs = inputs.to(device) | |
attention_mask = inputs.attention_mask.to(device) if 'attention_mask' in inputs else None | |
with torch.no_grad(): | |
output = model.generate( | |
inputs.input_features, | |
max_length=2048, | |
num_beams=8, | |
attention_mask=attention_mask, | |
pad_token_id=processor.tokenizer.pad_token_id, | |
eos_token_id=processor.tokenizer.eos_token_id | |
) | |
transcription += " ".join(processor.batch_decode(output, skip_special_tokens=True)) + " " | |
end_time = time.time() | |
transcription_time = end_time - start_time | |
word_count = len(transcription.split()) | |
result = f"Transcription: {transcription.strip()}\n\nTime taken: {transcription_time:.2f} seconds\nNumber of words: {word_count}" | |
return transcription.strip(), result | |
# summarization model | |
summarization_tokenizer = AutoTokenizer.from_pretrained("t5-base") | |
summarization_model = AutoModelForSeq2SeqLM.from_pretrained("t5-base") | |
# t5-base to device | |
summarization_model.to(device) | |
# Graph-based summarization|TextRank | |
def summarize_text(text): | |
sentences = sent_tokenize(text) | |
if len(sentences) == 0: | |
return "" | |
tfidf_vectorizer = TfidfVectorizer() | |
tfidf_matrix = tfidf_vectorizer.fit_transform(sentences) | |
similarity_matrix = cosine_similarity(tfidf_matrix) | |
nx_graph = nx.from_numpy_array(similarity_matrix) | |
scores = nx.pagerank(nx_graph) | |
ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True) | |
# Select top N sentences (e.g., 3 sentences for the summary) | |
top_n = 3 | |
summary = " ".join([s for _, s in ranked_sentences[:top_n]]) | |
return summary | |
# HTML syntax for imagery | |
image_html = """ | |
<div style="text-align: center;"> | |
<img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/raw/main/picture.png" alt="Banner" width="85%" height="auto"> | |
</div> | |
""" | |
def save_to_pdf(transcription, summary): | |
pdf = FPDF() | |
pdf.add_page() | |
pdf.set_font("Arial", size=12) | |
# include transcription | |
pdf.multi_cell(0, 10, "Transcription:\n" + transcription) | |
# paragraph space | |
pdf.ln(10) | |
# include summary | |
pdf.multi_cell(0, 10, "Summary:\n" + summary) | |
pdf_output_path = "transcription_summary.pdf" | |
pdf.output(pdf_output_path) | |
return pdf_output_path | |
# Gradio UI | |
iface = gr.Interface( | |
fn=transcribe_audio, | |
inputs=gr.Audio(type="filepath"), | |
outputs="text", | |
title="Audio Transcription App", | |
description="Upload an audio file to get the transcription", | |
theme="default", | |
live=False | |
) | |
# Gradio UI | |
iface = gr.Blocks() | |
with iface: | |
gr.HTML(image_html) | |
gr.Markdown("# Vi har nå muligheten til å oversette lydfiler til norsk skrift.") | |
with gr.Tabs(): | |
# First Tab: Transcription | |
with gr.TabItem("Transcription"): | |
audio_input = gr.Audio(type="filepath") | |
batch_size_input = gr.Slider(minimum=7, maximum=16, step=1, label="Batch Size") | |
transcription_output = gr.Textbox(label="Transcription | nb-whisper-large-semantic") | |
result_output = gr.Textbox(label="Time taken and Number of words") | |
transcribe_button = gr.Button("Transcribe") | |
def transcribe(audio_file, batch_size): | |
transcription, result = transcribe_audio(audio_file, batch_size) | |
return transcription, result | |
transcribe_button.click( | |
fn=transcribe, | |
inputs=[audio_input, batch_size_input], | |
outputs=[transcription_output, result_output] | |
) | |
# Second Tab: Summary | |
with gr.TabItem("Summary"): | |
summary_output = gr.Textbox(label="Summary | TextRank, graph-based") | |
summarize_button = gr.Button("Summarize") | |
def summarize(transcription): | |
if not transcription: | |
return "Warning: a transcription must be available." | |
summary = summarize_text(transcription) | |
return summary | |
summarize_button.click( | |
fn=summarize, | |
inputs=[transcription_output], # Use the transcription from the first tab | |
outputs=summary_output | |
) | |
# Third Tab: PDF Download Options | |
with gr.TabItem("Download PDF"): | |
pdf_transcription_only = gr.Button("Download PDF with Transcription Only") | |
pdf_summary_only = gr.Button("Download PDF with Summary Only") | |
pdf_both = gr.Button("Download PDF with Both") | |
pdf_output_transcription_only = gr.File(label="Download PDF") | |
pdf_output_summary_only = gr.File(label="Download PDF") | |
pdf_output_both = gr.File(label="Download PDF") | |
def generate_pdf_transcription_only(transcription): | |
return save_to_pdf(transcription, "") | |
def generate_pdf_summary_only(summary): | |
return save_to_pdf("", summary) | |
def generate_pdf_both(transcription, summary): | |
return save_to_pdf(transcription, summary) | |
pdf_transcription_only.click( | |
fn=generate_pdf_transcription_only, | |
inputs=[transcription_output], | |
outputs=[pdf_output_transcription_only] | |
) | |
pdf_summary_only.click( | |
fn=generate_pdf_summary_only, | |
inputs=[summary_output], | |
outputs=[pdf_output_summary_only] | |
) | |
pdf_both.click( | |
fn=generate_pdf_both, | |
inputs=[transcription_output, summary_output], | |
outputs=[pdf_output_both] | |
) | |
# run | |
iface.launch(share=True, debug=True) | |