Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,70 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
1 |
|
2 |
-
|
3 |
os.environ["HF_FORCE_SAFE_SERIALIZATION"] = "1"
|
4 |
-
os.environ["XFORMERS_FORCE_DISABLE"] = "1" #
|
5 |
|
6 |
-
# ββ β¨ xformers λλ―Έ λͺ¨λ μ½μ
|
7 |
dummy = types.ModuleType("xformers")
|
8 |
dummy.ops = types.ModuleType("xformers.ops") # audiocraftκ° ops νμλͺ¨λλ μ°Ύμ
|
9 |
sys.modules["xformers"] = dummy
|
10 |
sys.modules["xformers.ops"] = dummy.ops
|
11 |
-
#
|
12 |
|
13 |
-
# ββ audiocraft λμ μ€μΉ
|
14 |
try:
|
15 |
from audiocraft.models import MusicGen
|
16 |
except ModuleNotFoundError:
|
17 |
subprocess.check_call([
|
18 |
sys.executable, "-m", "pip", "install",
|
19 |
-
"git+https://github.com/facebookresearch/audiocraft@main",
|
|
|
20 |
])
|
21 |
from audiocraft.models import MusicGen
|
22 |
|
23 |
-
|
24 |
import gradio as gr
|
25 |
from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
|
26 |
-
from audiocraft.models import MusicGen
|
27 |
from audiocraft.data.audio import audio_write
|
28 |
from PIL import Image
|
29 |
import torch
|
30 |
-
import tempfile
|
31 |
|
32 |
-
# βββββ μ΄λ―Έμ§ μΊ‘μ
λ λͺ¨λΈ λ‘λ©
|
33 |
caption_model = VisionEncoderDecoderModel.from_pretrained(
|
34 |
"nlpconnect/vit-gpt2-image-captioning",
|
35 |
-
use_safetensors=True,
|
36 |
-
low_cpu_mem_usage=True
|
37 |
)
|
38 |
feature_extractor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
|
39 |
tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
|
40 |
|
41 |
-
# βββββ MusicGen λͺ¨λΈ λ‘λ©
|
42 |
musicgen = MusicGen.get_pretrained("facebook/musicgen-small")
|
43 |
-
musicgen.set_generation_params(duration=10) #
|
44 |
|
45 |
-
# βββββ μ΄λ―Έμ§ β μ€λͺ
λ¬Έμ₯ μμ±
|
46 |
-
def generate_caption(image):
|
47 |
pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
|
48 |
output_ids = caption_model.generate(pixel_values, max_length=50)
|
49 |
-
|
50 |
-
return caption
|
51 |
|
52 |
-
# βββββ μ€λͺ
β μμ
μμ±
|
53 |
-
def generate_music(prompt):
|
54 |
-
wav = musicgen.generate([prompt]) # batch size 1
|
55 |
tmp_dir = tempfile.mkdtemp()
|
56 |
audio_path = os.path.join(tmp_dir, "musicgen_output.wav")
|
57 |
audio_write(audio_path, wav[0], musicgen.sample_rate, strategy="loudness")
|
58 |
return audio_path
|
59 |
|
60 |
-
# βββββ μ 체 νμ΄νλΌμΈ
|
61 |
-
def process(image):
|
62 |
caption = generate_caption(image)
|
63 |
prompt = f"A cheerful melody inspired by: {caption}"
|
64 |
audio_path = generate_music(prompt)
|
65 |
return caption, audio_path
|
66 |
|
67 |
-
# βββββ Gradio μΈν°νμ΄μ€
|
68 |
demo = gr.Interface(
|
69 |
fn=process,
|
70 |
inputs=gr.Image(type="pil"),
|
@@ -72,8 +74,8 @@ demo = gr.Interface(
|
|
72 |
gr.Text(label="AIκ° μμ±ν κ·Έλ¦Ό μ€λͺ
"),
|
73 |
gr.Audio(label="μμ±λ AI μμ
(MusicGen)")
|
74 |
],
|
75 |
-
title="π¨ AI
|
76 |
-
description="κ·Έλ¦Όμ μ
λ‘λνλ©΄ AIκ° μ€λͺ
μ λ§λ€κ³ , μ€λͺ
μ λ°νμΌλ‘ μμ
μ
|
77 |
)
|
78 |
|
79 |
if __name__ == "__main__":
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import types
|
4 |
+
import subprocess
|
5 |
+
import tempfile
|
6 |
|
7 |
+
# ββ νκ²½ λ³μ μ€μ ββββββββββββββββββββββββββββββββββββββββββββββ
|
8 |
os.environ["HF_FORCE_SAFE_SERIALIZATION"] = "1"
|
9 |
+
os.environ["XFORMERS_FORCE_DISABLE"] = "1" # xformers λΉνμ±ν
|
10 |
|
11 |
+
# ββ β¨ xformers λλ―Έ λͺ¨λ μ½μ
ββββββββββββββββββββββββββββββββββ
|
12 |
dummy = types.ModuleType("xformers")
|
13 |
dummy.ops = types.ModuleType("xformers.ops") # audiocraftκ° ops νμλͺ¨λλ μ°Ύμ
|
14 |
sys.modules["xformers"] = dummy
|
15 |
sys.modules["xformers.ops"] = dummy.ops
|
16 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
17 |
|
18 |
+
# ββ audiocraft λμ μ€μΉ βββββββββββββββββββββββββββββββββββββββ
|
19 |
try:
|
20 |
from audiocraft.models import MusicGen
|
21 |
except ModuleNotFoundError:
|
22 |
subprocess.check_call([
|
23 |
sys.executable, "-m", "pip", "install",
|
24 |
+
"git+https://github.com/facebookresearch/audiocraft@main",
|
25 |
+
"--use-pep517" # μμ‘΄μ± ν¬ν¨ μ€μΉ
|
26 |
])
|
27 |
from audiocraft.models import MusicGen
|
28 |
|
|
|
29 |
import gradio as gr
|
30 |
from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
|
|
|
31 |
from audiocraft.data.audio import audio_write
|
32 |
from PIL import Image
|
33 |
import torch
|
|
|
34 |
|
35 |
+
# βββββ μ΄λ―Έμ§ μΊ‘μ
λ λͺ¨λΈ λ‘λ© βββββββββββββββββββββββββββββββββ
|
36 |
caption_model = VisionEncoderDecoderModel.from_pretrained(
|
37 |
"nlpconnect/vit-gpt2-image-captioning",
|
38 |
+
use_safetensors=True,
|
39 |
+
low_cpu_mem_usage=True
|
40 |
)
|
41 |
feature_extractor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
|
42 |
tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
|
43 |
|
44 |
+
# βββββ MusicGen λͺ¨λΈ λ‘λ© βββββββββββββββββββββββββββββββββββββ
|
45 |
musicgen = MusicGen.get_pretrained("facebook/musicgen-small")
|
46 |
+
musicgen.set_generation_params(duration=10) # μμ± μμ
κΈΈμ΄(μ΄)
|
47 |
|
48 |
+
# βββββ μ΄λ―Έμ§ β μ€λͺ
λ¬Έμ₯ μμ± ν¨μ ββββββββββββββββββββββββββββ
|
49 |
+
def generate_caption(image: Image.Image) -> str:
|
50 |
pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
|
51 |
output_ids = caption_model.generate(pixel_values, max_length=50)
|
52 |
+
return tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
|
|
53 |
|
54 |
+
# βββββ μ€λͺ
β μμ
μμ± ν¨μ ββββββββββββββββββββββββββββββββββ
|
55 |
+
def generate_music(prompt: str) -> str:
|
56 |
+
wav = musicgen.generate([prompt]) # batch size = 1
|
57 |
tmp_dir = tempfile.mkdtemp()
|
58 |
audio_path = os.path.join(tmp_dir, "musicgen_output.wav")
|
59 |
audio_write(audio_path, wav[0], musicgen.sample_rate, strategy="loudness")
|
60 |
return audio_path
|
61 |
|
62 |
+
# βββββ μ 체 νμ΄νλΌμΈ ββββββββββββββββββββββββββββββββββββββββ
|
63 |
+
def process(image: Image.Image):
|
64 |
caption = generate_caption(image)
|
65 |
prompt = f"A cheerful melody inspired by: {caption}"
|
66 |
audio_path = generate_music(prompt)
|
67 |
return caption, audio_path
|
68 |
|
69 |
+
# βββββ Gradio μΈν°νμ΄μ€ βββββββββββββββββββββββββββββββββββββ
|
70 |
demo = gr.Interface(
|
71 |
fn=process,
|
72 |
inputs=gr.Image(type="pil"),
|
|
|
74 |
gr.Text(label="AIκ° μμ±ν κ·Έλ¦Ό μ€λͺ
"),
|
75 |
gr.Audio(label="μμ±λ AI μμ
(MusicGen)")
|
76 |
],
|
77 |
+
title="π¨ AI κ·Έλ¦Ό-μμ
μμ±κΈ°",
|
78 |
+
description="κ·Έλ¦Όμ μ
λ‘λνλ©΄ AIκ° μ€λͺ
μ λ§λ€κ³ , μ€λͺ
μ λ°νμΌλ‘ μμ
μ μμ±ν΄ λ€λ €μ€λλ€."
|
79 |
)
|
80 |
|
81 |
if __name__ == "__main__":
|