Spaces:

yongyeol
/

imagetoaudio

Runtime error

App Files Files Community

imagetoaudio / app.py

yongyeol

Update app.py

2b108d4 verified about 2 months ago

raw

history blame

4.37 kB

	import os, io, base64, tempfile, requests
	import gradio as gr
	from PIL import Image

	# ─────────────────────────────────────────────────────────────
	# 1. 환경 변수 & HF Inference API 설정
	# ─────────────────────────────────────────────────────────────
	HF_TOKEN = os.getenv("HF_TOKEN")
	if not HF_TOKEN:
	raise RuntimeError("HF_TOKEN 비밀 값이 설정되어 있지 않습니다. Spaces Settings → Secrets에서 등록해 주세요.")

	HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}
	CAPTION_API = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-base"
	MUSIC_API = "https://api-inference.huggingface.co/models/facebook/musicgen-small"

	# ─────────────────────────────────────────────────────────────
	# 2. 이미지 캡션 생성 (BLIP-base via API)
	# ─────────────────────────────────────────────────────────────
	def generate_caption(image_pil: Image.Image) -> str:
	buf = io.BytesIO()
	image_pil.save(buf, format="PNG")
	buf.seek(0)

	# binary upload 방식
	response = requests.post(CAPTION_API, headers=HEADERS, data=buf.getvalue(), timeout=60)
	response.raise_for_status()
	result = response.json()
	# API 응답: [{"generated_text": "..."}]
	return result[0]["generated_text"]

	# ─────────────────────────────────────────────────────────────
	# 3. MusicGen-small 음악 생성 (10초, via API)
	# ─────────────────────────────────────────────────────────────
	def generate_music(prompt: str, duration: int = 10) -> str:
	payload = {"inputs": prompt, "parameters": {"duration": duration}}
	response = requests.post(MUSIC_API, headers=HEADERS, json=payload, timeout=120)
	response.raise_for_status()

	# API 응답은 WAV 바이너리
	tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
	tmp.write(response.content)
	tmp.close()
	return tmp.name

	# ─────────────────────────────────────────────────────────────
	# 4. 전체 파이프라인
	# ─────────────────────────────────────────────────────────────
	def process(image):
	caption = generate_caption(image)
	audio = generate_music(f"A cheerful melody inspired by: {caption}")
	return caption, audio

	# ─────────────────────────────────────────────────────────────
	# 5. Gradio 인터페이스
	# ─────────────────────────────────────────────────────────────
	# 5. Gradio 인터페이스 ──────────────────────────────────────
	demo = gr.Interface(
	fn=process,
	inputs=gr.Image(type="pil"),
	outputs=[
	gr.Text(label="AI가 생성한 그림 설명"),
	gr.Audio(label="생성된 AI 음악 (MusicGen)")
	],
	title="🎨 AI 그림-음악 생성기 (Inference API 버전)",
	description="이미지를 업로드하면 BLIP-base가 설명을 생성하고, "
	"해당 설명으로 MusicGen-small이 10초 음악을 만듭니다.",
	# concurrency_count=1 ← ❌ 삭제
	queue=True # (선택) 동시 요청을 큐잉하려면 이렇게만 두세요
	# cache_examples=False # 필요하면 유지
	)


	if __name__ == "__main__":
	demo.launch()