Spaces:
Runtime error
Runtime error
File size: 2,820 Bytes
343dde8 6748e07 78ea8dc d2e22b1 bfbdf81 343dde8 d2e22b1 343dde8 d2e22b1 78ea8dc 343dde8 87e6f23 6748e07 343dde8 6748e07 20017db 343dde8 4b414b1 343dde8 4b414b1 343dde8 d2e22b1 2b108d4 8e74b09 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import os, tempfile, soundfile as sf
import gradio as gr
from PIL import Image
from transformers import pipeline
# ββββββββββββββββββββββββββββββββββββββββββββββββ
# 1. νμ΄νλΌμΈ λ‘λ (CPU: device=-1)
# ββββββββββββββββββββββββββββββββββββββββββββββββ
CAPTION_ID = "Salesforce/blip-image-captioning-base" # μ©λβ: blip-image-captioning
MUSIC_ID = "facebook/musicgen-melody" # μ©λβ: musicgen-small
caption_pipe = pipeline(
"image-to-text",
model=CAPTION_ID,
device=-1
)
music_pipe = pipeline(
"text-to-audio",
model=MUSIC_ID,
device=-1,
generate_kwargs={"duration": 10} # μ΄ λ¨μ
)
# ββββββββββββββββββββββββββββββββββββββββββββββββ
# 2. μ νΈ ν¨μ
# ββββββββββββββββββββββββββββββββββββββββββββββββ
def generate_caption(img: Image.Image) -> str:
return caption_pipe(img)[0]["generated_text"]
def generate_music(prompt: str) -> str:
result = music_pipe(prompt, forward_params={"do_sample": True})[0]
audio, sr = result["audio"], result["sampling_rate"]
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
sf.write(tmp.name, audio, sr)
return tmp.name
# ββββββββββββββββββββββββββββββββββββββββββββββββ
# 3. μ 체 νμ΄νλΌμΈ
# ββββββββββββββββββββββββββββββββββββββββββββββββ
def process(image):
caption = generate_caption(image)
audio = generate_music(f"A cheerful melody inspired by: {caption}")
return caption, audio
# ββββββββββββββββββββββββββββββββββββββββββββββββ
# 4. Gradio UI
# ββββββββββββββββββββββββββββββββββββββββββββββββ
demo = gr.Interface(
fn=process,
inputs=gr.Image(type="pil"),
outputs=[
gr.Text(label="AIκ° μμ±ν κ·Έλ¦Ό μ€λͺ
"),
gr.Audio(label="μμ±λ AI μμ
(MusicGen)")
],
title="π¨ λ‘컬 BLIP-base + MusicGen-melody",
description="μ΄λ―Έμ§λ₯Ό μ
λ‘λνλ©΄ BLIP-baseκ° μ€λͺ
μ μμ±νκ³ , "
"κ·Έ μ€λͺ
μΌλ‘ MusicGen-melodyκ° 10μ΄ μμ
μ λ§λλλ€."
).queue()
if __name__ == "__main__":
demo.launch()
|