Spaces:

yongyeol
/

imagetoaudio

Runtime error

App Files Files Community

yongyeol commited on Jul 8

Commit

d2e22b1

verified ·

1 Parent(s): 855f2cd

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -45

app.py CHANGED Viewed

@@ -1,62 +1,43 @@
-import os, io, base64, tempfile, requests
 import gradio as gr
 from PIL import Image
-# ───────────────────────────────────────────────
-# 1. HF Inference API 준비
-# ───────────────────────────────────────────────
-HF_TOKEN = os.getenv("HF_TOKEN")           # Spaces → Settings → Secrets
-if not HF_TOKEN:
-    raise RuntimeError("HF_TOKEN 비밀값이 없습니다. Settings → Secrets에 등록하세요.")
-HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}
-CAPTION_API = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-base"
-MUSIC_API   = "https://api-inference.huggingface.co/models/facebook/musicgen-small"
-# ───────────────────────────────────────────────
-# 2. 캡션 생성 함수
-# ───────────────────────────────────────────────
-def generate_caption(image_pil: Image.Image) -> str:
-    buf = io.BytesIO()
-    image_pil.save(buf, format="PNG")
-    resp = requests.post(CAPTION_API, headers=HEADERS, data=buf.getvalue(), timeout=60)
-    resp.raise_for_status()
-    return resp.json()[0]["generated_text"]
-# ───────────────────────────────────────────────
-# 3. 음악 생성 함수
-# ───────────────────────────────────────────────
-def generate_music(prompt: str, duration: int = 10) -> str:
     payload = {"inputs": prompt, "parameters": {"duration": duration}}
-    resp = requests.post(MUSIC_API, headers=HEADERS, json=payload, timeout=120)
-    resp.raise_for_status()
     tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
-    tmp.write(resp.content)
     tmp.close()
     return tmp.name
-# ───────────────────────────────────────────────
-# 4. 전체 파이프라인
-# ───────────────────────────────────────────────
-def process(image):
-    caption = generate_caption(image)
-    audio   = generate_music(f"A cheerful melody inspired by: {caption}")
-    return caption, audio
-# ───────────────────────────────────────────────
-# 5. Gradio UI
-# ───────────────────────────────────────────────
 demo = gr.Interface(
     fn=process,
     inputs=gr.Image(type="pil"),
-    outputs=[
-        gr.Text(label="AI가 생성한 그림 설명"),
-        gr.Audio(label="생성된 AI 음악 (MusicGen)")
-    ],
-    title="🎨 AI 그림-음악 생성기 (Inference API)",
-    description="이미지를 업로드하면 BLIP-base가 설명을 만들고, "
-                "해당 설명으로 MusicGen-small이 10초 음악을 생성합니다."
-).queue()   # ★ 필요하면 이렇게 체이닝으로 큐 활성화
 if __name__ == "__main__":
     demo.launch()

+import os, tempfile, requests
 import gradio as gr
 from PIL import Image
+from transformers import pipeline
+# ────────────────────── 1. 캡셔닝 파이프라인 ──────────────────────
+caption_pipe = pipeline(
+    "image-to-text",
+    model="Salesforce/blip-image-captioning-base",  # tiny 모델로 바꾸려면 여기만 수정
+    device=-1,          # -1 → CPU, 0 이상 → GPU ID (Spaces CPU라면 -1 유지)
+)
+# ────────────────────── 2. MusicGen(Inf-API) ─────────────────────
+HF_TOKEN = os.getenv("HF_TOKEN")
+HEADERS  = {"Authorization": f"Bearer {HF_TOKEN}"}
+MUSIC_API = "https://api-inference.huggingface.co/models/facebook/musicgen-small"
+def generate_music(prompt: str, duration=10) -> str:
     payload = {"inputs": prompt, "parameters": {"duration": duration}}
+    r = requests.post(MUSIC_API, headers=HEADERS, json=payload, timeout=120)
+    r.raise_for_status()
     tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
+    tmp.write(r.content)
     tmp.close()
     return tmp.name
+# ────────────────────── 3. 전체 파이프라인 ──────────────────────
+def process(image: Image.Image):
+    caption = caption_pipe(image)[0]["generated_text"]
+    music   = generate_music(f"A cheerful melody inspired by: {caption}")
+    return caption, music
+# ────────────────────── 4. Gradio UI ────────────────────────────
 demo = gr.Interface(
     fn=process,
     inputs=gr.Image(type="pil"),
+    outputs=[gr.Text(), gr.Audio()],
+    title="🎨 로컬 BLIP-base + MusicGen-API",
+    description="CPU에서 BLIP-base로 캡션을 생성하고, 해당 캡션을 MusicGen-small Inference API로 전달해 10초 음악을 만듭니다."
+).queue()
 if __name__ == "__main__":
     demo.launch()