File size: 1,881 Bytes
5d52c32 6c226f9 d790c0b 88183ad 6c226f9 2362603 9d6fa91 66efbc3 6c226f9 5d52c32 3c0cd8e 6c226f9 3c0cd8e 6c226f9 47407ef 6c226f9 ae3ef7d 6c226f9 3ce82e9 3c0cd8e ae3ef7d 3c0cd8e 3ce82e9 6c226f9 a5bfe25 6c226f9 b95b5ca 6c226f9 ae3ef7d 6c226f9 47407ef 7097513 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import spaces
import torch
import gradio as gr
from transformers import pipeline
import tempfile
import os
MODEL_NAME = "openai/whisper-large-v3-turbo"
BATCH_SIZE = 8
FILE_LIMIT_MB = 1000
device = 0 if torch.cuda.is_available() else "cpu"
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=30,
device=device,
)
@spaces.GPU
def transcribe(inputs, task):
if inputs is None:
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
return text
demo = gr.Blocks(theme=gr.themes.Ocean())
css = """
footer {
visibility: hidden;
}
"""
mf_transcribe = gr.Interface(theme="Nymbo/Nymbo_Theme", css=css,
fn=transcribe,
inputs=[
gr.Audio(sources="microphone", type="filepath"),
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
],
outputs="text",
title="Whisper Large V3 Turbo: 음성을 텍스트로 변환",
allow_flagging="never",
)
file_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(sources="upload", type="filepath", label="Audio file"),
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
],
outputs="text",
title="Whisper Large V3: Transcribe Audio",
description=(
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
" of arbitrary length."
),
allow_flagging="never",
)
with demo:
gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
demo.queue().launch(ssr_mode=False)
|