Spaces:
Runtime error
Runtime error
import os, io, base64, tempfile, requests | |
import gradio as gr | |
from PIL import Image | |
# βββββββββββββββββββββββββββββββββββββββββββββββ | |
# 1. HF Inference API μ€λΉ | |
# βββββββββββββββββββββββββββββββββββββββββββββββ | |
HF_TOKEN = os.getenv("HF_TOKEN") # Spaces β Settings β Secrets | |
if not HF_TOKEN: | |
raise RuntimeError("HF_TOKEN λΉλ°κ°μ΄ μμ΅λλ€. Settings β Secretsμ λ±λ‘νμΈμ.") | |
HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} | |
CAPTION_API = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-base" | |
MUSIC_API = "https://api-inference.huggingface.co/models/facebook/musicgen-small" | |
# βββββββββββββββββββββββββββββββββββββββββββββββ | |
# 2. μΊ‘μ μμ± ν¨μ | |
# βββββββββββββββββββββββββββββββββββββββββββββββ | |
def generate_caption(image_pil: Image.Image) -> str: | |
buf = io.BytesIO() | |
image_pil.save(buf, format="PNG") | |
resp = requests.post(CAPTION_API, headers=HEADERS, data=buf.getvalue(), timeout=60) | |
resp.raise_for_status() | |
return resp.json()[0]["generated_text"] | |
# βββββββββββββββββββββββββββββββββββββββββββββββ | |
# 3. μμ μμ± ν¨μ | |
# βββββββββββββββββββββββββββββββββββββββββββββββ | |
def generate_music(prompt: str, duration: int = 10) -> str: | |
payload = {"inputs": prompt, "parameters": {"duration": duration}} | |
resp = requests.post(MUSIC_API, headers=HEADERS, json=payload, timeout=120) | |
resp.raise_for_status() | |
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") | |
tmp.write(resp.content) | |
tmp.close() | |
return tmp.name | |
# βββββββββββββββββββββββββββββββββββββββββββββββ | |
# 4. μ 체 νμ΄νλΌμΈ | |
# βββββββββββββββββββββββββββββββββββββββββββββββ | |
def process(image): | |
caption = generate_caption(image) | |
audio = generate_music(f"A cheerful melody inspired by: {caption}") | |
return caption, audio | |
# βββββββββββββββββββββββββββββββββββββββββββββββ | |
# 5. Gradio UI | |
# βββββββββββββββββββββββββββββββββββββββββββββββ | |
demo = gr.Interface( | |
fn=process, | |
inputs=gr.Image(type="pil"), | |
outputs=[ | |
gr.Text(label="AIκ° μμ±ν κ·Έλ¦Ό μ€λͺ "), | |
gr.Audio(label="μμ±λ AI μμ (MusicGen)") | |
], | |
title="π¨ AI κ·Έλ¦Ό-μμ μμ±κΈ° (Inference API)", | |
description="μ΄λ―Έμ§λ₯Ό μ λ‘λνλ©΄ BLIP-baseκ° μ€λͺ μ λ§λ€κ³ , " | |
"ν΄λΉ μ€λͺ μΌλ‘ MusicGen-smallμ΄ 10μ΄ μμ μ μμ±ν©λλ€." | |
).queue() # β νμνλ©΄ μ΄λ κ² μ²΄μ΄λμΌλ‘ ν νμ±ν | |
if __name__ == "__main__": | |
demo.launch() | |