Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import pipeline | |
from gradio_client import Client | |
# μ΄λ―Έμ§ μΈμ νμ΄νλΌμΈ λ‘λ | |
image_model = pipeline("image-classification", model="google/vit-base-patch16-224") | |
def generate_music(prompt): | |
client = Client("https://haoheliu-audioldm-48k-text-to-hifiaudio-generation.hf.space/") | |
# Assuming the API requires these five arguments: prompt, duration, guidance_scale, seed, and num_waveforms | |
# Adjust the names and values according to the API's actual requirements | |
result = client.predict( | |
prompt=prompt, | |
duration=5, | |
guidance_scale=5.5, | |
seed=5, | |
num_waveforms=3, | |
# Remove the api_name if it's not expected/needed, or adjust accordingly | |
# api_name="/text2audio" # Comment this out if api_name is not an expected argument | |
) | |
# Process the result | |
print(result) | |
return result | |
def generate_voice(prompt): | |
# Tango APIλ₯Ό μ¬μ©νμ¬ μμ± μμ± | |
client = Client("https://declare-lab-tango.hf.space/") | |
result = client.predict( | |
prompt, # μ΄λ―Έμ§ λΆλ₯ κ²°κ³Όλ₯Ό ν둬ννΈλ‘ μ¬μ© | |
100, # Steps | |
1, # Guidance Scale | |
api_name="/predict" # API μλν¬μΈνΈ κ²½λ‘ | |
) | |
# Tango API νΈμΆ κ²°κ³Ό μ²λ¦¬ | |
# μ: resultμμ μμ± νμΌ URL λλ λ°μ΄ν° μΆμΆ | |
return result | |
def classify_and_generate_voice(uploaded_image): | |
# μ΄λ―Έμ§ λΆλ₯ | |
predictions = image_model(uploaded_image) | |
top_prediction = predictions[0]['label'] # κ°μ₯ νλ₯ μ΄ λμ λΆλ₯ κ²°κ³Ό | |
# μμ± μμ± | |
voice_result = generate_voice("this is " + top_prediction) | |
# μμ μμ± | |
music_result = generate_music("The rnb beat of 85BPM drums." + top_prediction + ".") | |
# λ°νλ μμ± λ° μμ κ²°κ³Όλ₯Ό Gradio μΈν°νμ΄μ€λ‘ μ λ¬ | |
# μ: voice_result['url'] λλ voice_result['audio_data'] λ± | |
return top_prediction, voice_result, music_result | |
# Gradio μΈν°νμ΄μ€ μμ± | |
iface = gr.Interface( | |
fn=classify_and_generate_voice, | |
inputs=gr.Image(type="pil"), | |
outputs=[gr.Label(), gr.Audio(), gr.Audio()], | |
title="msVision_3", | |
description="μ΄λ―Έμ§λ₯Ό μ λ‘λνλ©΄, μ¬λ¬Όμ μΈμνκ³ ν΄λΉνλ μμ± λ° μμ μ μμ±ν©λλ€.(recognizes object and generate Voice&Music)", | |
examples=["dog.jpg","cafe.jpg","seoul.png"] | |
) | |
# μΈν°νμ΄μ€ μ€ν | |
iface.launch() | |