Spaces:
Runtime error
Runtime error
File size: 3,438 Bytes
6748e07 78ea8dc bfbdf81 855f2cd 6748e07 855f2cd 78ea8dc 6748e07 87e6f23 855f2cd 6748e07 855f2cd 2a55caa 855f2cd 6748e07 855f2cd 6748e07 855f2cd 6748e07 20017db 855f2cd 6748e07 855f2cd 6748e07 4b414b1 6748e07 4b414b1 855f2cd 4b414b1 8e74b09 855f2cd 2b108d4 8e74b09 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import os, io, base64, tempfile, requests
import gradio as gr
from PIL import Image
# βββββββββββββββββββββββββββββββββββββββββββββββ
# 1. HF Inference API μ€λΉ
# βββββββββββββββββββββββββββββββββββββββββββββββ
HF_TOKEN = os.getenv("HF_TOKEN") # Spaces β Settings β Secrets
if not HF_TOKEN:
raise RuntimeError("HF_TOKEN λΉλ°κ°μ΄ μμ΅λλ€. Settings β Secretsμ λ±λ‘νμΈμ.")
HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}
CAPTION_API = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-base"
MUSIC_API = "https://api-inference.huggingface.co/models/facebook/musicgen-small"
# βββββββββββββββββββββββββββββββββββββββββββββββ
# 2. μΊ‘μ
μμ± ν¨μ
# βββββββββββββββββββββββββββββββββββββββββββββββ
def generate_caption(image_pil: Image.Image) -> str:
buf = io.BytesIO()
image_pil.save(buf, format="PNG")
resp = requests.post(CAPTION_API, headers=HEADERS, data=buf.getvalue(), timeout=60)
resp.raise_for_status()
return resp.json()[0]["generated_text"]
# βββββββββββββββββββββββββββββββββββββββββββββββ
# 3. μμ
μμ± ν¨μ
# βββββββββββββββββββββββββββββββββββββββββββββββ
def generate_music(prompt: str, duration: int = 10) -> str:
payload = {"inputs": prompt, "parameters": {"duration": duration}}
resp = requests.post(MUSIC_API, headers=HEADERS, json=payload, timeout=120)
resp.raise_for_status()
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
tmp.write(resp.content)
tmp.close()
return tmp.name
# βββββββββββββββββββββββββββββββββββββββββββββββ
# 4. μ 체 νμ΄νλΌμΈ
# βββββββββββββββββββββββββββββββββββββββββββββββ
def process(image):
caption = generate_caption(image)
audio = generate_music(f"A cheerful melody inspired by: {caption}")
return caption, audio
# βββββββββββββββββββββββββββββββββββββββββββββββ
# 5. Gradio UI
# βββββββββββββββββββββββββββββββββββββββββββββββ
demo = gr.Interface(
fn=process,
inputs=gr.Image(type="pil"),
outputs=[
gr.Text(label="AIκ° μμ±ν κ·Έλ¦Ό μ€λͺ
"),
gr.Audio(label="μμ±λ AI μμ
(MusicGen)")
],
title="π¨ AI κ·Έλ¦Ό-μμ
μμ±κΈ° (Inference API)",
description="μ΄λ―Έμ§λ₯Ό μ
λ‘λνλ©΄ BLIP-baseκ° μ€λͺ
μ λ§λ€κ³ , "
"ν΄λΉ μ€λͺ
μΌλ‘ MusicGen-smallμ΄ 10μ΄ μμ
μ μμ±ν©λλ€."
).queue() # β
νμνλ©΄ μ΄λ κ² μ²΄μ΄λμΌλ‘ ν νμ±ν
if __name__ == "__main__":
demo.launch()
|