Spaces:

yongyeol
/

imagetoaudio

Runtime error

File size: 4,369 Bytes

6748e07
 
78ea8dc
bfbdf81
2a55caa
6748e07
2a55caa
6748e07
 
 
78ea8dc
6748e07
 
 
87e6f23
2a55caa
6748e07
2a55caa
6748e07
 
 
 
2a55caa
6748e07
 
 
 
 
 
8e74b09
2a55caa
6748e07
2a55caa
6748e07
 
 
 
07cf72c
6748e07
 
 
 
 
20017db
2a55caa
6748e07
2a55caa
6748e07
4b414b1
6748e07
 
4b414b1
2a55caa
6748e07
2a55caa
2b108d4
4b414b1
 
 
8e74b09
 
 
 
6748e07
2b108d4
 
 
 
 
4b414b1
 
2b108d4
8e74b09

import os, io, base64, tempfile, requests
import gradio as gr
from PIL import Image

# ─────────────────────────────────────────────────────────────
# 1. 환경 변수 & HF Inference API 설정
# ─────────────────────────────────────────────────────────────
HF_TOKEN = os.getenv("HF_TOKEN")
if not HF_TOKEN:
    raise RuntimeError("HF_TOKEN 비밀 값이 설정되어 있지 않습니다. Spaces Settings → Secrets에서 등록해 주세요.")

HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}
CAPTION_API = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-base"
MUSIC_API   = "https://api-inference.huggingface.co/models/facebook/musicgen-small"

# ─────────────────────────────────────────────────────────────
# 2. 이미지 캡션 생성 (BLIP-base via API)
# ─────────────────────────────────────────────────────────────
def generate_caption(image_pil: Image.Image) -> str:
    buf = io.BytesIO()
    image_pil.save(buf, format="PNG")
    buf.seek(0)

    # binary upload 방식
    response = requests.post(CAPTION_API, headers=HEADERS, data=buf.getvalue(), timeout=60)
    response.raise_for_status()
    result = response.json()
    # API 응답: [{"generated_text": "..."}]
    return result[0]["generated_text"]

# ─────────────────────────────────────────────────────────────
# 3. MusicGen-small 음악 생성 (10초, via API)
# ─────────────────────────────────────────────────────────────
def generate_music(prompt: str, duration: int = 10) -> str:
    payload = {"inputs": prompt, "parameters": {"duration": duration}}
    response = requests.post(MUSIC_API, headers=HEADERS, json=payload, timeout=120)
    response.raise_for_status()

    # API 응답은 WAV 바이너리
    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
    tmp.write(response.content)
    tmp.close()
    return tmp.name

# ─────────────────────────────────────────────────────────────
# 4. 전체 파이프라인
# ─────────────────────────────────────────────────────────────
def process(image):
    caption = generate_caption(image)
    audio   = generate_music(f"A cheerful melody inspired by: {caption}")
    return caption, audio

# ─────────────────────────────────────────────────────────────
# 5. Gradio 인터페이스
# ─────────────────────────────────────────────────────────────
# 5. Gradio 인터페이스 ──────────────────────────────────────
demo = gr.Interface(
    fn=process,
    inputs=gr.Image(type="pil"),
    outputs=[
        gr.Text(label="AI가 생성한 그림 설명"),
        gr.Audio(label="생성된 AI 음악 (MusicGen)")
    ],
    title="🎨 AI 그림-음악 생성기 (Inference API 버전)",
    description="이미지를 업로드하면 BLIP-base가 설명을 생성하고, "
                "해당 설명으로 MusicGen-small이 10초 음악을 만듭니다.",
    # concurrency_count=1   ← ❌ 삭제
    queue=True              # (선택) 동시 요청을 큐잉하려면 이렇게만 두세요
    # cache_examples=False  # 필요하면 유지
)


if __name__ == "__main__":
    demo.launch()