Spaces:
Build error
Build error
File size: 6,374 Bytes
d4b107b 8cb8264 592f7e1 b98f4ad 3d3ff49 c835efb 3d3ff49 d4b107b dda0718 440d6b7 3d3ff49 440d6b7 a5bd743 440d6b7 dda0718 b98f4ad dda0718 440d6b7 47661bd dda0718 b98f4ad 440d6b7 d2774a4 b98f4ad d2774a4 b98f4ad 592f7e1 b98f4ad 14c8f51 b98f4ad 14c8f51 b98f4ad d2774a4 2d9e081 dda0718 d2774a4 dda0718 d2774a4 badcd8d dda0718 55eafca d4b107b d2774a4 d4b107b dda0718 d4b107b 440d6b7 2d9e081 dda0718 440d6b7 d2774a4 b98f4ad d2774a4 dda0718 440d6b7 d2774a4 b98f4ad d2774a4 440d6b7 dda0718 8ec53db 7735671 d2774a4 7735671 b98f4ad 7735671 b98f4ad 7735671 b98f4ad 7735671 8ec53db b98f4ad 7735671 b98f4ad 7735671 8ec53db b98f4ad 7735671 8ec53db dda0718 869e885 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
import networkx as nx
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
nltk.download('punkt')
from nltk.tokenize import sent_tokenize
import gradio as gr
import warnings
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
from pydub import AudioSegment
from fpdf import FPDF
from PIL import Image
import time
import os
# import spaces
warnings.filterwarnings("ignore")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch_dtype = torch.float32
# ASR pipeline
pipe = pipeline("automatic-speech-recognition", model="NbAiLabBeta/nb-whisper-large-semantic", device=device, torch_dtype=torch.float32)
# Switch m4a to wav
def convert_to_wav(audio_file):
audio = AudioSegment.from_file(audio_file, format="m4a")
wav_file = "temp.wav"
audio.export(wav_file, format="wav")
return wav_file
# @spaces.GPU(queue=True)
# Transcription funct.@ASR pipeline
def transcribe_audio(audio_file):
if audio_file.endswith(".m4a"):
audio_file = convert_to_wav(audio_file)
start_time = time.time()
with torch.no_grad():
output = pipe(audio_file, chunk_length_s=30, generate_kwargs={"num_beams": 8, "task": "transcribe", "language": "no"})
transcription = output["text"]
end_time = time.time()
output_time = end_time - start_time
word_count = len(transcription.split())
result = f"Transcription: {transcription.strip()}\n\nTime taken: {output_time:.2f} seconds\nNumber of words: {word_count}"
return transcription.strip(), result
# t5-base model@summary funct.
summarization_tokenizer = AutoTokenizer.from_pretrained("t5-base")
summarization_model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
# t5-base to device
summarization_model.to(device)
# Graph-based summarization (TextRank, method)
def summarize_text(text):
sentences = sent_tokenize(text)
if len(sentences) == 0:
return ""
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(sentences)
similarity_matrix = cosine_similarity(tfidf_matrix)
nx_graph = nx.from_numpy_array(similarity_matrix)
scores = nx.pagerank(nx_graph)
ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)
# Select top N sentences (e.g., 3 sentences for the summary)
top_n = 3
summary = " ".join([s for _, s in ranked_sentences[:top_n]])
return summary
# HTML syntax for imagery
image_html = """
<div style="text-align: center;">
<img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/raw/main/picture.png" alt="Banner" width="85%" height="auto">
</div>
"""
# Transcription and summary@PDF option(s)
def save_to_pdf(transcription, summary):
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size=12)
if transcription:
pdf.multi_cell(0, 10, "Transcription:\n" + transcription)
# paragraph space
pdf.ln(10)
if summary:
pdf.multi_cell(0, 10, "Summary:\n" + summary)
pdf_output_path = "transcription_summary.pdf"
pdf.output(pdf_output_path)
return pdf_output_path
# Gradio
iface = gr.Interface(
fn=transcribe_audio,
inputs=gr.Audio(type="filepath"),
outputs="text",
title="Audio Transcription App",
description="Upload an audio file to get the transcription",
theme="default",
live=False
)
iface = gr.Blocks()
with iface:
gr.HTML(image_html)
gr.Markdown("# Vi har nå muligheten til å oversette lydfiler til norsk skrift.")
with gr.Tabs():
# Transcription Tab
with gr.TabItem("Transcription"):
audio_input = gr.Audio(type="filepath")
transcription_output = gr.Textbox(label="Transcription | nb-whisper-large-semantic")
result_output = gr.Textbox(label="Time taken and Number of words")
transcribe_button = gr.Button("Transcribe")
def transcribe(audio_file):
transcription, result = transcribe_audio(audio_file)
return transcription, result
transcribe_button.click(
fn=transcribe,
inputs=[audio_input],
outputs=[transcription_output, result_output]
)
# Summary Tab
with gr.TabItem("Summary"):
summary_output = gr.Textbox(label="Summary | TextRank, graph-based")
summarize_button = gr.Button("Summarize")
def summarize(transcription):
if not transcription:
return "Warning: a transcription must be available."
summary = summarize_text(transcription)
return summary
summarize_button.click(
fn=summarize,
inputs=[transcription_output],
outputs=summary_output
)
# PDF Download Tab
with gr.TabItem("Download PDF"):
pdf_transcription_only = gr.Button("Download PDF with Transcription Only")
pdf_summary_only = gr.Button("Download PDF with Summary Only")
pdf_both = gr.Button("Download PDF with Both")
pdf_output_transcription_only = gr.File(label="Download PDF")
pdf_output_summary_only = gr.File(label="Download PDF")
pdf_output_both = gr.File(label="Download PDF")
def generate_pdf_transcription_only(transcription):
return save_to_pdf(transcription, "")
def generate_pdf_summary_only(summary):
return save_to_pdf("", summary)
def generate_pdf_both(transcription, summary):
return save_to_pdf(transcription, summary)
pdf_transcription_only.click(
fn=generate_pdf_transcription_only,
inputs=[transcription_output],
outputs=[pdf_output_transcription_only]
)
pdf_summary_only.click(
fn=generate_pdf_summary_only,
inputs=[summary_output],
outputs=[pdf_output_summary_only]
)
pdf_both.click(
fn=generate_pdf_both,
inputs=[transcription_output, summary_output],
outputs=[pdf_output_both]
)
# run
iface.launch(share=True, debug=True)
|