Spaces:
Sleeping
Sleeping
File size: 5,688 Bytes
d076b8a ebb57ae d076b8a ebb57ae 00093e0 ebb57ae f412cfa 9faab60 adf8868 d076b8a ebb57ae f412cfa 73ba865 f412cfa 73ba865 f412cfa 73ba865 f412cfa 73ba865 f412cfa 73ba865 f412cfa 73ba865 8cdbc50 9faab60 adf8868 8cdbc50 9faab60 7ce9286 73ba865 ebb57ae adf8868 8cdbc50 ebb57ae 7ce9286 ebb57ae f412cfa ebb57ae adf8868 8cdbc50 9faab60 ebb57ae adf8868 8cdbc50 ebb57ae 7ce9286 ebb57ae 73ba865 7ce9286 73ba865 7ce9286 73ba865 7ce9286 73ba865 7ce9286 73ba865 ebb57ae 73ba865 ebb57ae 7ce9286 73ba865 7ce9286 ebb57ae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
import gradio as gr
from keras.models import load_model
from tensorflow.keras.utils import img_to_array
from numpy import expand_dims
from PIL import Image
import librosa
import numpy as np
import soundfile as sf
import os
import random
import tempfile
import matplotlib.pyplot as plt
import time # To generate unique filenames
# Load your Pix2Pix model (make sure the path is correct)
model = load_model('./model_022600.h5', compile=False)
# Function to shift frequencies
def shift_frequencies(spectrogram, shift):
return np.roll(spectrogram, shift, axis=0)
# Function to apply a frequency filter
def apply_filter(spectrogram, low_cut, high_cut):
filtered = np.copy(spectrogram)
filtered[:low_cut, :] = 0 # Attenuate low frequencies
filtered[high_cut:, :] = 0 # Attenuate high frequencies
return filtered
# Function to add harmonics
def add_harmonics(spectrogram, harmonic_shift):
harmonics = np.roll(spectrogram, harmonic_shift, axis=0) * 0.5 # Weaken the harmonics
return np.clip(spectrogram + harmonics, 0, 1)
# Function to modulate the amplitude
def modulate_amplitude(spectrogram, factor):
return np.clip(spectrogram * factor, 0, 1) # Amplify or attenuate the white areas
# Function to randomly apply transformations
def modify_spectrogram(spectrogram):
apply_shift = random.choice([True, False])
apply_filtering = random.choice([True, False])
apply_harmonics = random.choice([True, False])
apply_amplitude_modulation = random.choice([True, False])
if apply_shift:
shift_value = random.randint(-15, 15)
spectrogram = shift_frequencies(spectrogram, shift=shift_value)
if apply_filtering:
low_cut = random.randint(10, 50)
high_cut = random.randint(300, 600)
spectrogram = apply_filter(spectrogram, low_cut=low_cut, high_cut=high_cut)
if apply_harmonics:
harmonic_shift = random.randint(2, 10)
spectrogram = add_harmonics(spectrogram, harmonic_shift=harmonic_shift)
if apply_amplitude_modulation:
factor = random.uniform(0.8, 2.0)
spectrogram = modulate_amplitude(spectrogram, factor=factor)
return spectrogram
# Save the modified spectrogram image for display
def save_spectrogram_image(spectrogram, name):
plt.figure(figsize=(10, 4))
plt.imshow(spectrogram, aspect='auto', origin='lower', cmap='gray')
plt.axis('off')
# Save the spectrogram image using the unique name
temp_image_path = f"{name}_spectrogram.png"
plt.savefig(temp_image_path, bbox_inches='tight', pad_inches=0)
plt.close()
return temp_image_path
# Save the uploaded image with the same timestamp
def save_uploaded_image(input_image, name):
# Save the uploaded image with the same unique timestamp name
uploaded_image_path = f"{name}_uploaded_image.png"
input_image.save(uploaded_image_path)
return uploaded_image_path
# Process the input image and convert to audio
def process_image(input_image):
# Generate a unique name based on the current time
image_name = f"image_{int(time.time())}"
def load_image(image, size=(256, 256)):
image = image.resize(size)
pixels = img_to_array(image)
pixels = (pixels - 127.5) / 127.5
pixels = expand_dims(pixels, 0)
return pixels
# Save the uploaded image with the unique timestamp name
uploaded_image_path = save_uploaded_image(input_image, image_name)
# Preprocess the input
src_image = load_image(input_image)
# Generate output using the Pix2Pix model
gen_image = model.predict(src_image)
gen_image = (gen_image + 1) / 2.0 # scale to [0, 1]
# Resize the generated image to original spectrogram size
orig_size = (1293, 512)
gen_image_resized = Image.fromarray((gen_image[0] * 255).astype('uint8')).resize(orig_size).convert('F')
# Convert the image to a numpy array (spectrogram)
img = np.array(gen_image_resized)
# Modify the spectrogram randomly
img = modify_spectrogram(img)
# Save the modified spectrogram as an image, using the unique name
spectrogram_image_path = save_spectrogram_image(img, image_name)
# Convert the spectrogram back to audio using librosa
wav = librosa.feature.inverse.mel_to_audio(img, sr=44100, n_fft=2048, hop_length=512)
# Save the audio file, using the unique name
audio_file_path = f"{image_name}_generated_audio.wav"
sf.write(audio_file_path, wav, samplerate=44100)
return uploaded_image_path, spectrogram_image_path, audio_file_path # Return paths for uploaded image, spectrogram, and audio
# Gradio Interface
def gradio_process_image(input_image):
uploaded_image_path, spectrogram_image_path, audio_file_path = process_image(input_image)
# After Gradio finishes using these files, delete them to avoid keeping them around
def cleanup():
os.remove(uploaded_image_path)
os.remove(spectrogram_image_path)
os.remove(audio_file_path)
print(f"Deleted temp files: {uploaded_image_path}, {spectrogram_image_path}, {audio_file_path}")
return uploaded_image_path, spectrogram_image_path, audio_file_path, cleanup
# Create the Gradio interface
interface = gr.Interface(
fn=gradio_process_image,
inputs=gr.Image(type="pil"), # Input is an image
outputs=[gr.File(label="Uploaded Image"), gr.Image(type="filepath"), gr.Audio(type="filepath")], # Output uploaded image, spectrogram, and audio file
title="Image to Audio Generator with Spectrogram Display",
description="Upload an image, and get an audio file generated using Pix2Pix.",
)
# Launch the interface
interface.launch()
|