badassgi

Running

File size: 1,856 Bytes

5d52c32
6c226f9
 
 
 
d790c0b
 
88183ad
6c226f9
2362603
9d6fa91
66efbc3
6c226f9
 
 
 
 
 
 
 
 
 
 
5d52c32
3c0cd8e
 
b74c419
6c226f9
3c0cd8e
b74c419
6c226f9
 
ae3ef7d
 
 
 
 
 
b74c419
6c226f9
 
3ce82e9
b74c419
3c0cd8e
 
ae3ef7d
b74c419
3c0cd8e
 
 
 
 
b74c419
 
6c226f9
 
edce004
b74c419
6c226f9
 
b74c419
 
0520598
6c226f9
b74c419
6c226f9
47407ef

import spaces
import torch

import gradio as gr
from transformers import pipeline

import tempfile
import os

MODEL_NAME = "openai/whisper-large-v3-turbo"
BATCH_SIZE = 8
FILE_LIMIT_MB = 1000

device = 0 if torch.cuda.is_available() else "cpu"

pipe = pipeline(
    task="automatic-speech-recognition",
    model=MODEL_NAME,
    chunk_length_s=30,
    device=device,
)


@spaces.GPU
def transcribe(inputs, task):
    if inputs is None:
        raise gr.Error("오디오 파일이 제출되지 않았습니다! 요청을 제출하기 전에 오디오 파일을 업로드하거나 녹음해 주세요.")

    text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
    return text


css = """
footer {
    visibility: hidden;
}
"""

mf_transcribe = gr.Interface(css=css,
    fn=transcribe,
    inputs=[
        gr.Audio(sources="microphone", type="filepath"),
        gr.Radio(["transcribe", "translate"], label="작업", value="transcribe"),
    ],
    outputs="text",
    title="Whisper Large V3 Turbo: 음성을 텍스트로 변환",
    flagging_mode="never",  # 더 이상 사용되지 않는 allow_flagging을 flagging_mode로 변경
)

file_transcribe = gr.Interface(
    fn=transcribe,
    inputs=[
        gr.Audio(sources="upload", type="filepath", label="오디오 파일"),
        gr.Radio(["transcribe", "translate"], label="작업", value="transcribe"),
    ],
    outputs="text",
    title="Whisper Large V3 Turbo: 음성을 텍스트로 변환",
    flagging_mode="never",  # 더 이상 사용되지 않는 allow_flagging을 flagging_mode로 변경
)

# demo 변수를 Gradio Blocks 컨테이너로 정의
demo = gr.Blocks(theme="Nymbo/Nymbo_Theme")

with demo:
    gr.TabbedInterface([mf_transcribe, file_transcribe], ["마이크", "오디오 파일"])

demo.queue().launch(ssr_mode=False)