File size: 1,340 Bytes
305c59b f8b81a1 1e0f1bc f8b81a1 d30da85 1e0f1bc d30da85 1e0f1bc d30da85 1e0f1bc d30da85 1e0f1bc d30da85 1e0f1bc d30da85 1e0f1bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
import gradio as gr
from faster_whisper import WhisperModel
# Load the Faster Whisper model
model = WhisperModel("large-v3", device="cpu") # Use "cuda" for GPU
# Define the transcription function
def transcribe_audio(audio_file):
"""
Transcribes the audio file using the Faster Whisper model.
"""
try:
segments, info = model.transcribe(audio_file, beam_size=5) # Adjust beam_size as needed
transcription = "\n".join(
[f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}" for segment in segments]
)
return transcription
except Exception as e:
return f"Error: {str(e)}"
# Create the Gradio interface
interface = gr.Interface(
fn=transcribe_audio, # Function to process the input
inputs=gr.Audio(source="upload", type="filepath", label="Upload Audio"), # Input: Audio file
outputs=gr.Textbox(label="Transcription"), # Output: Textbox for the transcription
title="Audio-to-Text Transcription",
description=(
"Upload an audio file and get the transcription using the Faster Whisper model "
"large-v3. Supports high-quality transcription with beam search."
),
allow_flagging="never",
)
# Launch the Gradio app
if __name__ == "__main__":
interface.launch(server_name="0.0.0.0", server_port=7860, share=True)
|