File size: 1,125 Bytes
7971622 5472a84 7971622 5472a84 7971622 5472a84 7971622 5472a84 7971622 5472a84 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
import gradio as gr
from transformers import pipeline
from fpdf import FPDF
import librosa
def transcribe_and_generate_pdf(audio_file):
try:
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-large")
audio, _ = librosa.load(audio_file, sr=16000) # Resample to 16kHz
transcription = transcriber(audio)["text"]
output_pdf = "transcription.pdf"
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size=12)
pdf.multi_cell(0, 10, transcription)
pdf.output(output_pdf)
return transcription, output_pdf
except Exception as e:
return f"An error occurred: {e}", None
interface = gr.Interface(
fn=transcribe_and_generate_pdf,
inputs=gr.Audio(type="filepath"), # Updated to remove 'source' and use 'type="filepath"'
outputs=[
gr.Textbox(label="Transcription"),
gr.File(label="Download PDF")
],
title="Audio-to-Text and PDF Generator",
description="Upload an audio file to get its transcription and download the PDF."
)
if __name__ == "__main__":
interface.launch() |