File size: 898 Bytes
c464395
e9e7628
 
c464395
e9e7628
 
c464395
e9e7628
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c464395
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import gradio as gr
import whisperx
import whisper

def transcribe(audio_file):
    device = "cuda"

    # Transcribe with original Whisper
    model = whisper.load_model("large", device)
    result = model.transcribe(audio_file)

    # Load alignment model and metadata
    model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device)

    # Align Whisper output
    result_aligned = whisperx.align(result["segments"], model_a, metadata, audio_file, device)

    return result_aligned["segments"], result_aligned["word_segments"]

# Define Gradio interface
inputs = gr.inputs.Audio(source="upload", type="file")
outputs = [
    gr.outputs.Textbox(label="Segments (before alignment)"),
    gr.outputs.Textbox(label="Segments (after alignment)"),
]

iface = gr.Interface(fn=transcribe, inputs=inputs, outputs=outputs, title="WhisperX Transcription")
iface.launch()