yongyeol commited on
Commit
d2e22b1
Β·
verified Β·
1 Parent(s): 855f2cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -45
app.py CHANGED
@@ -1,62 +1,43 @@
1
- import os, io, base64, tempfile, requests
2
  import gradio as gr
3
  from PIL import Image
 
4
 
5
- # ───────────────────────────────────────────────
6
- # 1. HF Inference API μ€€λΉ„
7
- # ───────────────────────────────────────────────
8
- HF_TOKEN = os.getenv("HF_TOKEN") # Spaces β†’ Settings β†’ Secrets
9
- if not HF_TOKEN:
10
- raise RuntimeError("HF_TOKEN 비밀값이 μ—†μŠ΅λ‹ˆλ‹€. Settings β†’ Secrets에 λ“±λ‘ν•˜μ„Έμš”.")
11
 
12
- HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}
13
- CAPTION_API = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-base"
14
- MUSIC_API = "https://api-inference.huggingface.co/models/facebook/musicgen-small"
 
15
 
16
- # ───────────────────────────────────────────────
17
- # 2. μΊ‘μ…˜ 생성 ν•¨μˆ˜
18
- # ───────────────────────────────────────────────
19
- def generate_caption(image_pil: Image.Image) -> str:
20
- buf = io.BytesIO()
21
- image_pil.save(buf, format="PNG")
22
- resp = requests.post(CAPTION_API, headers=HEADERS, data=buf.getvalue(), timeout=60)
23
- resp.raise_for_status()
24
- return resp.json()[0]["generated_text"]
25
-
26
- # ───────────────────────────────────────────────
27
- # 3. μŒμ•… 생성 ν•¨μˆ˜
28
- # ───────────────────────────────────────────────
29
- def generate_music(prompt: str, duration: int = 10) -> str:
30
  payload = {"inputs": prompt, "parameters": {"duration": duration}}
31
- resp = requests.post(MUSIC_API, headers=HEADERS, json=payload, timeout=120)
32
- resp.raise_for_status()
33
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
34
- tmp.write(resp.content)
35
  tmp.close()
36
  return tmp.name
37
 
38
- # ───────────────────────────────────────────────
39
- # 4. 전체 νŒŒμ΄ν”„λΌμΈ
40
- # ───────────────────────────────────────────────
41
- def process(image):
42
- caption = generate_caption(image)
43
- audio = generate_music(f"A cheerful melody inspired by: {caption}")
44
- return caption, audio
45
 
46
- # ───────────────────────────────────────────────
47
- # 5. Gradio UI
48
- # ───────────────────────────────────────────────
49
  demo = gr.Interface(
50
  fn=process,
51
  inputs=gr.Image(type="pil"),
52
- outputs=[
53
- gr.Text(label="AIκ°€ μƒμ„±ν•œ κ·Έλ¦Ό μ„€λͺ…"),
54
- gr.Audio(label="μƒμ„±λœ AI μŒμ•… (MusicGen)")
55
- ],
56
- title="🎨 AI κ·Έλ¦Ό-μŒμ•… 생성기 (Inference API)",
57
- description="이미지λ₯Ό μ—…λ‘œλ“œν•˜λ©΄ BLIP-baseκ°€ μ„€λͺ…을 λ§Œλ“€κ³ , "
58
- "ν•΄λ‹Ή μ„€λͺ…μœΌλ‘œ MusicGen-small이 10초 μŒμ•…μ„ μƒμ„±ν•©λ‹ˆλ‹€."
59
- ).queue() # β˜… ν•„μš”ν•˜λ©΄ μ΄λ ‡κ²Œ μ²΄μ΄λ‹μœΌλ‘œ 큐 ν™œμ„±ν™”
60
 
61
  if __name__ == "__main__":
62
  demo.launch()
 
1
+ import os, tempfile, requests
2
  import gradio as gr
3
  from PIL import Image
4
+ from transformers import pipeline
5
 
6
+ # ────────────────────── 1. 캑셔닝 νŒŒμ΄ν”„λΌμΈ ──────────────────────
7
+ caption_pipe = pipeline(
8
+ "image-to-text",
9
+ model="Salesforce/blip-image-captioning-base", # tiny λͺ¨λΈλ‘œ λ°”κΎΈλ €λ©΄ μ—¬κΈ°λ§Œ μˆ˜μ •
10
+ device=-1, # -1 β†’ CPU, 0 이상 β†’ GPU ID (Spaces CPU라면 -1 μœ μ§€)
11
+ )
12
 
13
+ # ────────────────────── 2. MusicGen(Inf-API) ─────────────────────
14
+ HF_TOKEN = os.getenv("HF_TOKEN")
15
+ HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}
16
+ MUSIC_API = "https://api-inference.huggingface.co/models/facebook/musicgen-small"
17
 
18
+ def generate_music(prompt: str, duration=10) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  payload = {"inputs": prompt, "parameters": {"duration": duration}}
20
+ r = requests.post(MUSIC_API, headers=HEADERS, json=payload, timeout=120)
21
+ r.raise_for_status()
22
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
23
+ tmp.write(r.content)
24
  tmp.close()
25
  return tmp.name
26
 
27
+ # ────────────────────── 3. 전체 νŒŒμ΄ν”„λΌμΈ ──────────────────────
28
+ def process(image: Image.Image):
29
+ caption = caption_pipe(image)[0]["generated_text"]
30
+ music = generate_music(f"A cheerful melody inspired by: {caption}")
31
+ return caption, music
 
 
32
 
33
+ # ────────────────────── 4. Gradio UI ────────────────────────────
 
 
34
  demo = gr.Interface(
35
  fn=process,
36
  inputs=gr.Image(type="pil"),
37
+ outputs=[gr.Text(), gr.Audio()],
38
+ title="🎨 둜컬 BLIP-base + MusicGen-API",
39
+ description="CPUμ—μ„œ BLIP-base둜 μΊ‘μ…˜μ„ μƒμ„±ν•˜κ³ , ν•΄λ‹Ή μΊ‘μ…˜μ„ MusicGen-small Inference API둜 전달해 10초 μŒμ•…μ„ λ§Œλ“­λ‹ˆλ‹€."
40
+ ).queue()
 
 
 
 
41
 
42
  if __name__ == "__main__":
43
  demo.launch()