|
import gradio as gr |
|
from faster_whisper import WhisperModel |
|
|
|
|
|
model = WhisperModel("large-v3", device="cpu") |
|
|
|
|
|
def transcribe_audio(audio_file): |
|
""" |
|
Transcribes the audio file using the Faster Whisper model. |
|
""" |
|
try: |
|
segments, info = model.transcribe(audio_file, beam_size=5) |
|
transcription = "\n".join( |
|
[f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}" for segment in segments] |
|
) |
|
return transcription |
|
except Exception as e: |
|
return f"Error: {str(e)}" |
|
|
|
|
|
interface = gr.Interface( |
|
fn=transcribe_audio, |
|
inputs=gr.Audio(source="upload", type="filepath", label="Upload Audio"), |
|
outputs=gr.Textbox(label="Transcription"), |
|
title="Audio-to-Text Transcription", |
|
description=( |
|
"Upload an audio file and get the transcription using the Faster Whisper model " |
|
"large-v3. Supports high-quality transcription with beam search." |
|
), |
|
allow_flagging="never", |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
interface.launch(server_name="0.0.0.0", server_port=7860, share=True) |
|
|