WhisperX-v2 / app.py
ras0k's picture
first real test
e9e7628
raw
history blame
898 Bytes
import gradio as gr
import whisperx
import whisper
def transcribe(audio_file):
device = "cuda"
# Transcribe with original Whisper
model = whisper.load_model("large", device)
result = model.transcribe(audio_file)
# Load alignment model and metadata
model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device)
# Align Whisper output
result_aligned = whisperx.align(result["segments"], model_a, metadata, audio_file, device)
return result_aligned["segments"], result_aligned["word_segments"]
# Define Gradio interface
inputs = gr.inputs.Audio(source="upload", type="file")
outputs = [
gr.outputs.Textbox(label="Segments (before alignment)"),
gr.outputs.Textbox(label="Segments (after alignment)"),
]
iface = gr.Interface(fn=transcribe, inputs=inputs, outputs=outputs, title="WhisperX Transcription")
iface.launch()