|
import torch |
|
print(f"Torch version: {torch.version.cuda}") |
|
|
|
from stable_diffusion_tf.stable_diffusion import StableDiffusion as StableDiffusionPy |
|
import gradio as gr |
|
from tensorflow import keras |
|
from PIL import Image |
|
|
|
from spectro import wav_bytes_from_spectrogram_image |
|
|
|
keras.mixed_precision.set_global_policy("mixed_float16") |
|
|
|
resolution=512 |
|
sd_dreambooth_model_1=StableDiffusionPy(resolution, resolution, download_weights=False, jit_compile=True) |
|
|
|
sd_dreambooth_model_1.load_weights_from_pytorch_ckpt("riffusion-model-v1.ckpt") |
|
|
|
sd_dreambooth_model_1.diffusion_model.load_weights("dreambooth_riffusion_model_currulao_v1/") |
|
|
|
|
|
def generate_images(prompt: str, num_steps: int, unconditional_guidance_scale: int, temperature: int): |
|
img = sd_dreambooth_model_1.generate( |
|
prompt, |
|
num_steps=num_steps, |
|
unconditional_guidance_scale=unconditional_guidance_scale, |
|
temperature=temperature, |
|
batch_size=1, |
|
) |
|
|
|
pil_img = Image.fromarray(img[0]) |
|
pil_img.save("img.png") |
|
wav = wav_bytes_from_spectrogram_image(pil_img) |
|
with open("output.wav", "wb") as f: |
|
f.write(wav[0].getbuffer()) |
|
final_video = gr.make_waveform("output.wav", bg_image="img.png") |
|
return final_video |
|
|
|
|
|
|
|
gr.Interface( |
|
title="Keras Dreambooth Riffusion-Currulao", |
|
description="""This SD model has been fine-tuned from Riffusion to generate spectrograms of [Currulao](https://en.wikipedia.org/wiki/Music_of_Colombia#Currulao) music. Currulao is a traditional Afro-Colombian music and dance genre, characterized by its rhythmic beats, call-and-response singing, and lively percussion instruments, that holds significant cultural and social importance in Colombia, particularly in the Pacific coast region, as a celebration of African heritage and community identity. |
|
To generate the concept, use the phrase 'a $currulao song' in your prompt. |
|
""", |
|
fn=generate_images, |
|
inputs=[ |
|
gr.Textbox(label="Prompt", value="a $currulao song, lo-fi"), |
|
gr.Slider(label="Inference steps", value=50), |
|
gr.Slider(label="Guidance scale", value=7.5, maximum=15, minimum=0, step=0.5), |
|
gr.Slider(label='Temperature', value=1, maximum=1.5, minimum=0, step=0.1), |
|
], |
|
outputs=[ |
|
gr.Video(), |
|
], |
|
examples=[["a $currulao song", 50, 7.5, 1], |
|
["a $currulao song, lo-fi, nostalgic", 100, 9.5, 0.7]], |
|
).queue().launch(debug=True) |