File size: 1,577 Bytes
5d52c32 6c226f9 d790c0b 88183ad 6c226f9 2362603 9d6fa91 66efbc3 6c226f9 5d52c32 3c0cd8e 6c226f9 3c0cd8e 6c226f9 ae3ef7d 6c226f9 3ce82e9 3c0cd8e ae3ef7d 3c0cd8e 3ce82e9 6c226f9 edce004 6c226f9 ae3ef7d 6c226f9 47407ef 7097513 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import spaces
import torch
import gradio as gr
from transformers import pipeline
import tempfile
import os
MODEL_NAME = "openai/whisper-large-v3-turbo"
BATCH_SIZE = 8
FILE_LIMIT_MB = 1000
device = 0 if torch.cuda.is_available() else "cpu"
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=30,
device=device,
)
@spaces.GPU
def transcribe(inputs, task):
if inputs is None:
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
return text
css = """
footer {
visibility: hidden;
}
"""
mf_transcribe = gr.Interface(theme="Nymbo/Nymbo_Theme", css=css,
fn=transcribe,
inputs=[
gr.Audio(sources="microphone", type="filepath"),
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
],
outputs="text",
title="Whisper Large V3 Turbo: ์์ฑ์ ํ
์คํธ๋ก ๋ณํ",
allow_flagging="never",
)
file_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(sources="upload", type="filepath", label="Audio file"),
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
],
outputs="text",
title="Whisper Large V3 Turbo: ์์ฑ์ ํ
์คํธ๋ก ๋ณํ",
allow_flagging="never",
)
with demo:
gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
demo.queue().launch(ssr_mode=False)
|