File size: 3,196 Bytes
5d52c32
6c226f9
 
 
 
a5099f1
d790c0b
88183ad
6c226f9
2362603
9d6fa91
66efbc3
6c226f9
 
 
 
 
 
 
 
 
 
a5099f1
 
6c226f9
5d52c32
a5099f1
3c0cd8e
b74c419
6c226f9
a5099f1
3c0cd8e
a5099f1
 
 
 
6b749d2
a5099f1
bbf453e
6b749d2
 
a5099f1
 
 
9eabde1
a5099f1
6b749d2
a5099f1
bbf453e
a5099f1
 
 
6b749d2
 
a5099f1
6c226f9
6b749d2
2524c67
9eabde1
ae3ef7d
 
 
 
 
 
0fdae18
a5099f1
6c226f9
0fdae18
b74c419
3c0cd8e
a5099f1
 
 
3c0cd8e
 
0fdae18
a5099f1
3c0cd8e
0fdae18
b74c419
6c226f9
a5099f1
 
 
6c226f9
 
b74c419
 
0520598
26b11b8
6c226f9
26b11b8
6c226f9
47407ef
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import spaces
import torch

import gradio as gr
from transformers import pipeline
from huggingface_hub import InferenceClient
import tempfile
import os

MODEL_NAME = "openai/whisper-large-v3-turbo"
BATCH_SIZE = 8
FILE_LIMIT_MB = 1000

device = 0 if torch.cuda.is_available() else "cpu"

pipe = pipeline(
    task="automatic-speech-recognition",
    model=MODEL_NAME,
    chunk_length_s=30,
    device=device,
)

# Hugging Face InferenceClient μ‚¬μš©
hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN"))

@spaces.GPU
def transcribe_summarize_and_blog(inputs, task):
    if inputs is None:
        raise gr.Error("μ˜€λ””μ˜€ 파일이 μ œμΆœλ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€! μš”μ²­μ„ μ œμΆœν•˜κΈ° 전에 μ˜€λ””μ˜€ νŒŒμΌμ„ μ—…λ‘œλ“œν•˜κ±°λ‚˜ λ…ΉμŒν•΄ μ£Όμ„Έμš”.")

    # μŒμ„±μ„ ν…μŠ€νŠΈλ‘œ λ³€ν™˜
    text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
    
    # λ³€ν™˜λœ ν…μŠ€νŠΈ μš”μ•½ μš”μ²­
    try:
        summary = hf_client.summarization(text)
        summary_text = summary["summary_text"] if summary and "summary_text" in summary else "μš”μ•½ν•  수 μ—†μŠ΅λ‹ˆλ‹€."
    except Exception as e:
        summary_text = f"μš”μ•½ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {e}"
    
    # λΈ”λ‘œκ·Έ ν¬μŠ€νŒ… 생성 μš”μ²­
    try:
        blog_post = hf_client.text_generation(
            prompt=f"λ‹€μŒ λ‚΄μš©μ„ 기반으둜 λΈ”λ‘œκ·Έ ν¬μŠ€νŒ…μ„ μž‘μ„±ν•΄ μ£Όμ„Έμš”:\n{text}",
            temperature=0.7
        )
        blog_post_text = blog_post if isinstance(blog_post, str) else "λΈ”λ‘œκ·Έ ν¬μŠ€νŒ…μ„ 생성할 수 μ—†μŠ΅λ‹ˆλ‹€."
    except Exception as e:
        blog_post_text = f"λΈ”λ‘œκ·Έ κΈ€ 생성 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {e}"
    
    return {
        "transcribed_text": text, 
        "summary": summary_text,
        "blog_post": blog_post_text
    }



 
css = """
footer {
    visibility: hidden;
}
"""

file_transcribe = gr.Interface(
    fn=transcribe_summarize_and_blog,
    inputs=[
        gr.Audio(sources="upload", type="filepath", label="μ˜€λ””μ˜€ 파일"),
        gr.Radio(["transcribe", "translate"], label="μž‘μ—…", value="transcribe"),
    ],
    outputs=["text", "text", "text"],  # λ³€ν™˜λœ ν…μŠ€νŠΈ, μš”μ•½, λΈ”λ‘œκ·Έ κΈ€ 좜λ ₯
    title="λ°›μ•„μ“°κΈ° AI: μŒμ„±μ„ ν…μŠ€νŠΈ λ³€ν™˜, μš”μ•½ 및 λΈ”λ‘œκ·Έ ν¬μŠ€νŒ… μžλ™ 생성",
    flagging_mode="never",
)

mf_transcribe = gr.Interface(css=css,
    fn=transcribe_summarize_and_blog,
    inputs=[
        gr.Audio(sources="microphone", type="filepath"),
        gr.Radio(["transcribe", "translate"], label="μž‘μ—…", value="transcribe"),
    ],
    outputs=["text", "text", "text"],  # λ³€ν™˜λœ ν…μŠ€νŠΈ, μš”μ•½, λΈ”λ‘œκ·Έ κΈ€ 좜λ ₯
    title="λ°›μ•„μ“°κΈ° AI: μŒμ„±μ„ ν…μŠ€νŠΈ λ³€ν™˜, μš”μ•½ 및 λΈ”λ‘œκ·Έ ν¬μŠ€νŒ… μžλ™ 생성",
    flagging_mode="never",
)

# demo λ³€μˆ˜λ₯Ό Gradio Blocks μ»¨ν…Œμ΄λ„ˆλ‘œ μ •μ˜
demo = gr.Blocks(theme="Nymbo/Nymbo_Theme")

# νƒ­ μˆœμ„œλ₯Ό "μ˜€λ””μ˜€ 파일"이 λ¨Όμ €, "마이크"κ°€ 뒀에 μ˜€λ„λ‘ μ„€μ •
with demo:
    gr.TabbedInterface([file_transcribe, mf_transcribe], ["μ˜€λ””μ˜€ 파일", "마이크"])

demo.queue().launch(ssr_mode=False)