Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import pipeline | |
from gradio_client import Client | |
# μ΄λ―Έμ§ μΈμ νμ΄νλΌμΈ λ‘λ | |
image_model = pipeline("image-classification", model="google/vit-base-patch16-224") | |
def generate_music(prompt): | |
client = Client("https://haoheliu-audioldm-48k-text-to-hifiaudio-generation.hf.space/") | |
result = client.predict( | |
prompt="Howdy!", # 'Input your text here' ν μ€νΈ λ°μ€ μ»΄ν¬λνΈ | |
duration=5, # 'Duration (seconds)' μ¬λΌμ΄λ μ»΄ν¬λνΈμμμ κ° λ²μ (5 ~ 15) | |
guidance_scale=0, # 'Guidance scale' μ¬λΌμ΄λ μ»΄ν¬λνΈμμμ κ° λ²μ (0 ~ 6) | |
seed=5, # 'Seed' μ«μ μ»΄ν¬λνΈμ κ° | |
num_waveforms=1, # 'Number waveforms to generate' μ¬λΌμ΄λ μ»΄ν¬λνΈμμμ κ° λ²μ (1 ~ 3) | |
api_name="/text2audio" # API μλν¬μΈνΈ κ²½λ‘ | |
) | |
print(result) | |
def generate_voice(prompt): | |
# Tango APIλ₯Ό μ¬μ©νμ¬ μμ± μμ± | |
client = Client("https://declare-lab-tango.hf.space/") | |
result = client.predict( | |
prompt, # μ΄λ―Έμ§ λΆλ₯ κ²°κ³Όλ₯Ό ν둬ννΈλ‘ μ¬μ© | |
100, # Steps | |
1, # Guidance Scale | |
api_name="/predict" # API μλν¬μΈνΈ κ²½λ‘ | |
) | |
# Tango API νΈμΆ κ²°κ³Ό μ²λ¦¬ | |
# μ: resultμμ μμ± νμΌ URL λλ λ°μ΄ν° μΆμΆ | |
return result | |
def classify_and_generate_voice(uploaded_image): | |
# μ΄λ―Έμ§ λΆλ₯ | |
predictions = image_model(uploaded_image) | |
top_prediction = predictions[0]['label'] # κ°μ₯ νλ₯ μ΄ λμ λΆλ₯ κ²°κ³Ό | |
# μμ± μμ± | |
voice_result = generate_voice("this is " + top_prediction) | |
# μμ μμ± | |
music_result = generate_music("The rnb beat of 85BPM drums." + top_prediction + ".") | |
# λ°νλ μμ± λ° μμ κ²°κ³Όλ₯Ό Gradio μΈν°νμ΄μ€λ‘ μ λ¬ | |
# μ: voice_result['url'] λλ voice_result['audio_data'] λ± | |
return top_prediction, voice_result, music_result | |
# Gradio μΈν°νμ΄μ€ μμ± | |
iface = gr.Interface( | |
fn=classify_and_generate_voice, | |
inputs=gr.Image(type="pil"), | |
outputs=[gr.Label(), gr.Audio(), gr.Audio()], | |
title="msVision_3", | |
description="μ΄λ―Έμ§λ₯Ό μ λ‘λνλ©΄, μ¬λ¬Όμ μΈμνκ³ ν΄λΉνλ μμ± λ° μμ μ μμ±ν©λλ€.(recognizes object and generate Voice&Music)", | |
examples=["dog.jpg","cafe.jpg","seoul.png"] | |
) | |
# μΈν°νμ΄μ€ μ€ν | |
iface.launch() | |