File size: 1,881 Bytes
5d52c32
6c226f9
 
 
 
d790c0b
 
88183ad
6c226f9
2362603
9d6fa91
66efbc3
6c226f9
 
 
 
 
 
 
 
 
 
 
5d52c32
3c0cd8e
 
 
6c226f9
3c0cd8e
 
6c226f9
 
47407ef
6c226f9
ae3ef7d
 
 
 
 
 
 
6c226f9
 
3ce82e9
 
3c0cd8e
 
ae3ef7d
 
3c0cd8e
 
 
 
 
 
3ce82e9
 
6c226f9
 
a5bfe25
6c226f9
b95b5ca
6c226f9
 
 
 
 
 
 
ae3ef7d
6c226f9
47407ef
7097513
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import spaces
import torch

import gradio as gr
from transformers import pipeline

import tempfile
import os

MODEL_NAME = "openai/whisper-large-v3-turbo"
BATCH_SIZE = 8
FILE_LIMIT_MB = 1000

device = 0 if torch.cuda.is_available() else "cpu"

pipe = pipeline(
    task="automatic-speech-recognition",
    model=MODEL_NAME,
    chunk_length_s=30,
    device=device,
)


@spaces.GPU
def transcribe(inputs, task):
    if inputs is None:
        raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")

    text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
    return  text


demo = gr.Blocks(theme=gr.themes.Ocean())

css = """
footer {
    visibility: hidden;
}
"""

mf_transcribe = gr.Interface(theme="Nymbo/Nymbo_Theme", css=css,
    fn=transcribe,
    inputs=[
        gr.Audio(sources="microphone", type="filepath"),
        gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
    ],
    outputs="text",
    title="Whisper Large V3 Turbo: 음성을 텍스트로 변환",

    allow_flagging="never",
)

file_transcribe = gr.Interface(
    fn=transcribe,
    inputs=[
        gr.Audio(sources="upload", type="filepath", label="Audio file"),
        gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
    ],
    outputs="text",
    title="Whisper Large V3: Transcribe Audio",
    description=(
        "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
        f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
        " of arbitrary length."
    ),
    allow_flagging="never",
)

with demo:
    gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])

demo.queue().launch(ssr_mode=False)