File size: 3,438 Bytes
6748e07
 
78ea8dc
bfbdf81
855f2cd
 
 
 
6748e07
855f2cd
78ea8dc
6748e07
 
 
87e6f23
855f2cd
 
 
6748e07
 
 
855f2cd
 
 
2a55caa
855f2cd
 
 
6748e07
 
855f2cd
 
6748e07
855f2cd
6748e07
 
20017db
855f2cd
6748e07
855f2cd
6748e07
4b414b1
6748e07
 
4b414b1
855f2cd
 
 
4b414b1
 
 
8e74b09
 
 
 
855f2cd
 
 
 
2b108d4
8e74b09
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import os, io, base64, tempfile, requests
import gradio as gr
from PIL import Image

# ───────────────────────────────────────────────
# 1. HF Inference API μ€€λΉ„
# ───────────────────────────────────────────────
HF_TOKEN = os.getenv("HF_TOKEN")           # Spaces β†’ Settings β†’ Secrets
if not HF_TOKEN:
    raise RuntimeError("HF_TOKEN 비밀값이 μ—†μŠ΅λ‹ˆλ‹€. Settings β†’ Secrets에 λ“±λ‘ν•˜μ„Έμš”.")

HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}
CAPTION_API = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-base"
MUSIC_API   = "https://api-inference.huggingface.co/models/facebook/musicgen-small"

# ───────────────────────────────────────────────
# 2. μΊ‘μ…˜ 생성 ν•¨μˆ˜
# ───────────────────────────────────────────────
def generate_caption(image_pil: Image.Image) -> str:
    buf = io.BytesIO()
    image_pil.save(buf, format="PNG")
    resp = requests.post(CAPTION_API, headers=HEADERS, data=buf.getvalue(), timeout=60)
    resp.raise_for_status()
    return resp.json()[0]["generated_text"]

# ───────────────────────────────────────────────
# 3. μŒμ•… 생성 ν•¨μˆ˜
# ───────────────────────────────────────────────
def generate_music(prompt: str, duration: int = 10) -> str:
    payload = {"inputs": prompt, "parameters": {"duration": duration}}
    resp = requests.post(MUSIC_API, headers=HEADERS, json=payload, timeout=120)
    resp.raise_for_status()
    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
    tmp.write(resp.content)
    tmp.close()
    return tmp.name

# ───────────────────────────────────────────────
# 4. 전체 νŒŒμ΄ν”„λΌμΈ
# ───────────────────────────────────────────────
def process(image):
    caption = generate_caption(image)
    audio   = generate_music(f"A cheerful melody inspired by: {caption}")
    return caption, audio

# ───────────────────────────────────────────────
# 5. Gradio UI
# ───────────────────────────────────────────────
demo = gr.Interface(
    fn=process,
    inputs=gr.Image(type="pil"),
    outputs=[
        gr.Text(label="AIκ°€ μƒμ„±ν•œ κ·Έλ¦Ό μ„€λͺ…"),
        gr.Audio(label="μƒμ„±λœ AI μŒμ•… (MusicGen)")
    ],
    title="🎨 AI κ·Έλ¦Ό-μŒμ•… 생성기 (Inference API)",
    description="이미지λ₯Ό μ—…λ‘œλ“œν•˜λ©΄ BLIP-baseκ°€ μ„€λͺ…을 λ§Œλ“€κ³ , "
                "ν•΄λ‹Ή μ„€λͺ…μœΌλ‘œ MusicGen-small이 10초 μŒμ•…μ„ μƒμ„±ν•©λ‹ˆλ‹€."
).queue()   # β˜… ν•„μš”ν•˜λ©΄ μ΄λ ‡κ²Œ μ²΄μ΄λ‹μœΌλ‘œ 큐 ν™œμ„±ν™”

if __name__ == "__main__":
    demo.launch()