Spaces:
Sleeping
Sleeping
File size: 5,389 Bytes
d076b8a ebb57ae d076b8a ebb57ae 00093e0 ebb57ae f412cfa 9faab60 d076b8a ebb57ae f412cfa 9faab60 ebb57ae f412cfa ebb57ae 9faab60 ebb57ae 50795ae ebb57ae 9faab60 ebb57ae 095807d 9faab60 ebb57ae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
import gradio as gr
from keras.models import load_model
from tensorflow.keras.utils import img_to_array
from tensorflow.keras.utils import load_img
from numpy import expand_dims
from PIL import Image
import librosa
import numpy as np
import soundfile as sf
import os
import random
import tempfile
import matplotlib.pyplot as plt
# Load your Pix2Pix model (make sure the path is correct)
model = load_model('./model_022600.h5', compile=False)
# Function to shift frequencies
def shift_frequencies(spectrogram, shift):
return np.roll(spectrogram, shift, axis=0)
# Function to apply a frequency filter
def apply_filter(spectrogram, low_cut, high_cut):
filtered = np.copy(spectrogram)
filtered[:low_cut, :] = 0 # Attenuate low frequencies
filtered[high_cut:, :] = 0 # Attenuate high frequencies
return filtered
# Function to add harmonics
def add_harmonics(spectrogram, harmonic_shift):
harmonics = np.roll(spectrogram, harmonic_shift, axis=0) * 0.5 # Weaken the harmonics
return np.clip(spectrogram + harmonics, 0, 1)
# Function to modulate the amplitude
def modulate_amplitude(spectrogram, factor):
return np.clip(spectrogram * factor, 0, 1) # Amplify or attenuate the white areas
# Function to randomly decide which transformations to apply and with what parameters
def modify_spectrogram(spectrogram):
# Random decision for transformations
apply_shift = random.choice([True, False])
apply_filtering = random.choice([True, False])
apply_harmonics = random.choice([True, False])
apply_amplitude_modulation = random.choice([True, False])
# Randomly select the values for each transformation
if apply_shift:
shift_value = random.randint(-15, 15) # Random shift between -15 and 15
print(f"Applying frequency shift: {shift_value}")
spectrogram = shift_frequencies(spectrogram, shift=shift_value)
if apply_filtering:
low_cut = random.randint(10, 50) # Random low_cut between 10 and 50
high_cut = random.randint(300, 600) # Random high_cut between 300 and 600
print(f"Applying filter: low_cut={low_cut}, high_cut={high_cut}")
spectrogram = apply_filter(spectrogram, low_cut=low_cut, high_cut=high_cut)
if apply_harmonics:
harmonic_shift = random.randint(2, 10) # Random harmonic shift between 2 and 10
print(f"Applying harmonic shift: {harmonic_shift}")
spectrogram = add_harmonics(spectrogram, harmonic_shift=harmonic_shift)
if apply_amplitude_modulation:
factor = random.uniform(0.8, 2.0) # Random amplitude factor between 0.8 and 2.0
print(f"Applying amplitude modulation: factor={factor}")
spectrogram = modulate_amplitude(spectrogram, factor=factor)
return spectrogram
# Function to save the modified spectrogram image for display
def save_spectrogram_image(spectrogram):
plt.figure(figsize=(10, 4))
plt.imshow(spectrogram, aspect='auto', origin='lower', cmap='gray')
plt.axis('off')
# Save to a temporary file
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_image_file:
plt.savefig(temp_image_file.name, bbox_inches='tight', pad_inches=0)
temp_image_path = temp_image_file.name
plt.close()
return temp_image_path
# Function to process the input image and convert to audio
def process_image(input_image):
# Load and preprocess the input image
def load_image(image, size=(256, 256)):
image = image.resize(size)
pixels = img_to_array(image)
pixels = (pixels - 127.5) / 127.5
pixels = expand_dims(pixels, 0)
return pixels
# Preprocess the input
src_image = load_image(input_image)
# Generate output using the Pix2Pix model
gen_image = model.predict(src_image)
gen_image = (gen_image + 1) / 2.0 # scale to [0, 1]
# Resize the generated image to original spectrogram size
orig_size = (1293, 512)
gen_image_resized = Image.fromarray((gen_image[0] * 255).astype('uint8')).resize(orig_size).convert('F')
# Convert the image to a numpy array (spectrogram)
img = np.array(gen_image_resized)
# Modify the spectrogram randomly
img = modify_spectrogram(img)
# Save the modified spectrogram as an image
spectrogram_image_path = save_spectrogram_image(img)
# Convert the spectrogram back to audio using librosa
wav = librosa.feature.inverse.mel_to_audio(img, sr=44100, n_fft=2048, hop_length=512)
# Save the audio file to a temporary location
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
sf.write(temp_audio_file.name, wav, samplerate=44100)
audio_file_path = temp_audio_file.name
return spectrogram_image_path, audio_file_path # Return the paths for both spectrogram image and audio
# Create a Gradio interface
interface = gr.Interface(
fn=process_image,
inputs=gr.Image(type="pil"), # Input is an image
outputs=[gr.Image(type="filepath"), gr.Audio(type="filepath")], # Output both spectrogram image and audio file
title="Image to Audio Generator with Spectrogram Display", # App title
description="Upload an image (preferably a spectrogram), and get an audio file generated using Pix2Pix. You can also see the modified spectrogram.",
)
# Launch the interface
interface.launch()
|