Spaces:

yongyeol
/

imagetoaudio

Runtime error

App Files Files Community

imagetoaudio / app.py

yongyeol

Update app.py

855f2cd verified about 1 month ago

raw

history blame

3.44 kB

	import os, io, base64, tempfile, requests
	import gradio as gr
	from PIL import Image

	# ───────────────────────────────────────────────
	# 1. HF Inference API 준비
	# ───────────────────────────────────────────────
	HF_TOKEN = os.getenv("HF_TOKEN") # Spaces → Settings → Secrets
	if not HF_TOKEN:
	raise RuntimeError("HF_TOKEN 비밀값이 없습니다. Settings → Secrets에 등록하세요.")

	HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}
	CAPTION_API = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-base"
	MUSIC_API = "https://api-inference.huggingface.co/models/facebook/musicgen-small"

	# ───────────────────────────────────────────────
	# 2. 캡션 생성 함수
	# ───────────────────────────────────────────────
	def generate_caption(image_pil: Image.Image) -> str:
	buf = io.BytesIO()
	image_pil.save(buf, format="PNG")
	resp = requests.post(CAPTION_API, headers=HEADERS, data=buf.getvalue(), timeout=60)
	resp.raise_for_status()
	return resp.json()[0]["generated_text"]

	# ───────────────────────────────────────────────
	# 3. 음악 생성 함수
	# ───────────────────────────────────────────────
	def generate_music(prompt: str, duration: int = 10) -> str:
	payload = {"inputs": prompt, "parameters": {"duration": duration}}
	resp = requests.post(MUSIC_API, headers=HEADERS, json=payload, timeout=120)
	resp.raise_for_status()
	tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
	tmp.write(resp.content)
	tmp.close()
	return tmp.name

	# ───────────────────────────────────────────────
	# 4. 전체 파이프라인
	# ───────────────────────────────────────────────
	def process(image):
	caption = generate_caption(image)
	audio = generate_music(f"A cheerful melody inspired by: {caption}")
	return caption, audio

	# ───────────────────────────────────────────────
	# 5. Gradio UI
	# ───────────────────────────────────────────────
	demo = gr.Interface(
	fn=process,
	inputs=gr.Image(type="pil"),
	outputs=[
	gr.Text(label="AI가 생성한 그림 설명"),
	gr.Audio(label="생성된 AI 음악 (MusicGen)")
	],
	title="🎨 AI 그림-음악 생성기 (Inference API)",
	description="이미지를 업로드하면 BLIP-base가 설명을 만들고, "
	"해당 설명으로 MusicGen-small이 10초 음악을 생성합니다."
	).queue() # ★ 필요하면 이렇게 체이닝으로 큐 활성화

	if __name__ == "__main__":
	demo.launch()