<a href="https://colab.research.google.com/github/teticio/audio-diffusion/blob/master/notebooks/test_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
try:
    # are we running on Google Colab?
    import google.colab
    !git clone -q https://github.com/teticio/audio-diffusion.git
    %cd audio-diffusion
    !pip install -q -r requirements.txt
except:
    pass

In [None]:
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.abspath("")))

In [None]:
import torch
import random
import numpy as np
from datasets import load_dataset
from IPython.display import Audio
from audiodiffusion.mel import Mel
from audiodiffusion import AudioDiffusion

### Select model

In [None]:
#@markdown teticio/audio-diffusion-256                     - trained on my Spotify "liked" playlist

#@markdown teticio/audio-diffusion-breaks-256              - trained on samples used in music

#@markdown teticio/audio-diffusion-instrumental-hiphop-256 - trained on instrumental hiphop

model_id = "teticio/audio-diffusion-256"  #@param ["teticio/audio-diffusion-256", "teticio/audio-diffusion-breaks-256", "audio-diffusion-instrumenal-hiphop-256"]

### Run model inference to generate mel spectrogram, audios and loops

In [None]:
audio_diffusion = AudioDiffusion(model_id=model_id)

In [None]:
generator = torch.Generator()
for _ in range(10):
    print(f'Seed = {generator.seed()}')
    image, (sample_rate, audio) = audio_diffusion.generate_spectrogram_and_audio(generator)
    display(image)
    display(Audio(audio, rate=sample_rate))
    loop = AudioDiffusion.loop_it(audio, sample_rate)
    if loop is not None:
        display(Audio(loop, rate=sample_rate))
    else:
        print("Unable to determine loop points")

### Generate variations of audios

Try playing around with `start_steps`. Values closer to zero will produce new samples, while values closer to `steps` will produce samples more faithful to the original. You can also try generatring variations of a `slice` of an `audio_file` instead of passing in a `raw_audio`.

In [None]:
seed = 16183389798189209330  #@param {type:"integer"}
image, (sample_rate,
        audio) = audio_diffusion.generate_spectrogram_and_audio_from_audio(
            generator=torch.Generator().manual_seed(seed))  #@param {type:"integer"}
display(image)
display(Audio(audio, rate=sample_rate))

In [None]:
loop = AudioDiffusion.loop_it(audio, sample_rate, loops=1)
for variation in range(12):
    image2, (
        sample_rate, audio2
    ) = audio_diffusion.generate_spectrogram_and_audio_from_audio(
        raw_audio=audio,
        slice=0,
        start_step=0,
        steps=1000)
    display(image2)
    display(Audio(audio2, rate=sample_rate))
    loop = np.concatenate([loop, AudioDiffusion.loop_it(audio2, sample_rate, loops=1)])
display(Audio(loop, rate=sample_rate))

### Compare results with random sample from training set

In [None]:
mel = Mel(x_res=256, y_res=256)

In [None]:
ds = load_dataset(model_id)

In [None]:
image = random.choice(ds['train'])['image']
image

In [None]:
audio = mel.image_to_audio(image)
Audio(data=audio, rate=mel.get_sample_rate())