|
import spaces |
|
import torch |
|
|
|
import gradio as gr |
|
from transformers import pipeline |
|
|
|
import tempfile |
|
import os |
|
|
|
MODEL_NAME = "openai/whisper-large-v3-turbo" |
|
BATCH_SIZE = 8 |
|
FILE_LIMIT_MB = 1000 |
|
|
|
device = 0 if torch.cuda.is_available() else "cpu" |
|
|
|
pipe = pipeline( |
|
task="automatic-speech-recognition", |
|
model=MODEL_NAME, |
|
chunk_length_s=30, |
|
device=device, |
|
) |
|
|
|
|
|
@spaces.GPU |
|
def transcribe(inputs, task): |
|
if inputs is None: |
|
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.") |
|
|
|
text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"] |
|
return text |
|
|
|
|
|
css = """ |
|
footer { |
|
visibility: hidden; |
|
} |
|
""" |
|
|
|
mf_transcribe = gr.Interface(theme="Nymbo/Nymbo_Theme", css=css, |
|
fn=transcribe, |
|
inputs=[ |
|
gr.Audio(sources="microphone", type="filepath"), |
|
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"), |
|
], |
|
outputs="text", |
|
title="Whisper Large V3 Turbo: μμ±μ ν
μ€νΈλ‘ λ³ν", |
|
|
|
allow_flagging="never", |
|
) |
|
|
|
file_transcribe = gr.Interface( |
|
fn=transcribe, |
|
inputs=[ |
|
gr.Audio(sources="upload", type="filepath", label="Audio file"), |
|
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"), |
|
], |
|
outputs="text", |
|
title="Whisper Large V3 Turbo: μμ±μ ν
μ€νΈλ‘ λ³ν", |
|
allow_flagging="never", |
|
) |
|
|
|
with demo: |
|
gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"]) |
|
|
|
demo.queue().launch(ssr_mode=False) |
|
|
|
|