Spaces:
Runtime error
Runtime error
File size: 2,289 Bytes
2bd9468 a6d7b81 2bd9468 9902a40 8770d52 2bd9468 ebcd803 3377e03 a6d7b81 87c119f 3377e03 8770d52 a6d7b81 3377e03 ebcd803 a6d7b81 ebcd803 a6d7b81 ebcd803 ad7babb a6d7b81 23708c8 3377e03 23708c8 e6812f7 872e164 11179ad bc25881 23708c8 2bd9468 ad7babb 3382a71 a6d7b81 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import gradio as gr
from transformers import pipeline
from gradio_client import Client # κ°μ : gradio_client λΌμ΄λΈλ¬λ¦¬κ° μ¬μ© κ°λ₯νλ€.
# μ΄λ―Έμ§ μΈμ νμ΄νλΌμΈ λ‘λ
image_model = pipeline("image-classification", model="google/vit-base-patch16-224")
def generate_music(prompt):
# μμ
μμ± API νΈμΆ
client = Client("https://haoheliu-audioldm2-text2audio-text2music.hf.space/")
result = client.predict(
prompt,
prompt, # μμ± μμ±μ μ¬μ©λ ν둬ννΈ
5, # μμ
μ κΈΈμ΄ (μ΄)
0, # κ°μ΄λμ€ μ€μΌμΌ
5, # μλ κ°
1, # μμ±ν waveformμ μ
fn_index=1 # ν¨μ μΈλ±μ€
)
# API νΈμΆ κ²°κ³Ό μ²λ¦¬
return result
def generate_voice(prompt):
# Tango APIλ₯Ό μ¬μ©νμ¬ μμ± μμ±
client = Client("https://declare-lab-tango.hf.space/")
result = client.predict(
prompt, # μ΄λ―Έμ§ λΆλ₯ κ²°κ³Όλ₯Ό ν둬ννΈλ‘ μ¬μ©
100, # Steps
1, # Guidance Scale
api_name="/predict" # API μλν¬μΈνΈ κ²½λ‘
)
# Tango API νΈμΆ κ²°κ³Ό μ²λ¦¬
# μ: resultμμ μμ± νμΌ URL λλ λ°μ΄ν° μΆμΆ
return result
def classify_and_generate_voice(uploaded_image):
# μ΄λ―Έμ§ λΆλ₯
predictions = image_model(uploaded_image)
top_prediction = predictions[0]['label'] # κ°μ₯ νλ₯ μ΄ λμ λΆλ₯ κ²°κ³Ό
# μμ± μμ±
voice_result = generate_voice(top_prediction)
# μμ
μμ±
music_result = generate_music("Generate music for: " + top_prediction)
# λ°νλ μμ± λ° μμ
κ²°κ³Όλ₯Ό Gradio μΈν°νμ΄μ€λ‘ μ λ¬
# μ: voice_result['url'] λλ voice_result['audio_data'] λ±
return top_prediction, voice_result, music_result
# Gradio μΈν°νμ΄μ€ μμ±
iface = gr.Interface(
fn=classify_and_generate_voice,
inputs=gr.Image(type="pil"),
outputs=[gr.Label(), gr.Audio(), gr.Audio()],
title="msVision_3",
description="μ΄λ―Έμ§λ₯Ό μ
λ‘λνλ©΄, μ¬λ¬Όμ μΈμνκ³ ν΄λΉνλ μμ±μ μμ±ν©λλ€.(recognizes the object and generate voice)",
examples=["dog.jpg", "cat.png", "cafe.jpg"] # μμ λ λΆλΆ: μ½€λ§ μΆκ°
)
# μΈν°νμ΄μ€ μ€ν
iface.launch()
|