Spaces:

musdfakoc
/

local_intelligence

Sleeping

File size: 5,389 Bytes

d076b8a
ebb57ae
 
 
 
d076b8a
ebb57ae
00093e0
ebb57ae
 
f412cfa
9faab60
 
d076b8a
ebb57ae
 
 
f412cfa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9faab60
 
 
 
 
 
 
 
 
 
 
 
 
 
ebb57ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f412cfa
 
 
ebb57ae
9faab60
 
 
ebb57ae
 
 
 
50795ae
 
 
ebb57ae
9faab60
ebb57ae
 
 
 
 
095807d
9faab60
 
ebb57ae

import gradio as gr
from keras.models import load_model
from tensorflow.keras.utils import img_to_array
from tensorflow.keras.utils import load_img
from numpy import expand_dims
from PIL import Image
import librosa
import numpy as np
import soundfile as sf
import os
import random
import tempfile
import matplotlib.pyplot as plt

# Load your Pix2Pix model (make sure the path is correct)
model = load_model('./model_022600.h5', compile=False)

# Function to shift frequencies
def shift_frequencies(spectrogram, shift):
    return np.roll(spectrogram, shift, axis=0)

# Function to apply a frequency filter
def apply_filter(spectrogram, low_cut, high_cut):
    filtered = np.copy(spectrogram)
    filtered[:low_cut, :] = 0  # Attenuate low frequencies
    filtered[high_cut:, :] = 0  # Attenuate high frequencies
    return filtered

# Function to add harmonics
def add_harmonics(spectrogram, harmonic_shift):
    harmonics = np.roll(spectrogram, harmonic_shift, axis=0) * 0.5  # Weaken the harmonics
    return np.clip(spectrogram + harmonics, 0, 1)

# Function to modulate the amplitude
def modulate_amplitude(spectrogram, factor):
    return np.clip(spectrogram * factor, 0, 1)  # Amplify or attenuate the white areas

# Function to randomly decide which transformations to apply and with what parameters
def modify_spectrogram(spectrogram):
    # Random decision for transformations
    apply_shift = random.choice([True, False])
    apply_filtering = random.choice([True, False])
    apply_harmonics = random.choice([True, False])
    apply_amplitude_modulation = random.choice([True, False])
    
    # Randomly select the values for each transformation
    if apply_shift:
        shift_value = random.randint(-15, 15)  # Random shift between -15 and 15
        print(f"Applying frequency shift: {shift_value}")
        spectrogram = shift_frequencies(spectrogram, shift=shift_value)

    if apply_filtering:
        low_cut = random.randint(10, 50)  # Random low_cut between 10 and 50
        high_cut = random.randint(300, 600)  # Random high_cut between 300 and 600
        print(f"Applying filter: low_cut={low_cut}, high_cut={high_cut}")
        spectrogram = apply_filter(spectrogram, low_cut=low_cut, high_cut=high_cut)

    if apply_harmonics:
        harmonic_shift = random.randint(2, 10)  # Random harmonic shift between 2 and 10
        print(f"Applying harmonic shift: {harmonic_shift}")
        spectrogram = add_harmonics(spectrogram, harmonic_shift=harmonic_shift)

    if apply_amplitude_modulation:
        factor = random.uniform(0.8, 2.0)  # Random amplitude factor between 0.8 and 2.0
        print(f"Applying amplitude modulation: factor={factor}")
        spectrogram = modulate_amplitude(spectrogram, factor=factor)
    
    return spectrogram

# Function to save the modified spectrogram image for display
def save_spectrogram_image(spectrogram):
    plt.figure(figsize=(10, 4))
    plt.imshow(spectrogram, aspect='auto', origin='lower', cmap='gray')
    plt.axis('off')
    
    # Save to a temporary file
    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_image_file:
        plt.savefig(temp_image_file.name, bbox_inches='tight', pad_inches=0)
        temp_image_path = temp_image_file.name
    
    plt.close()
    return temp_image_path

# Function to process the input image and convert to audio
def process_image(input_image):
    # Load and preprocess the input image
    def load_image(image, size=(256, 256)):
        image = image.resize(size)
        pixels = img_to_array(image)
        pixels = (pixels - 127.5) / 127.5
        pixels = expand_dims(pixels, 0)
        return pixels

    # Preprocess the input
    src_image = load_image(input_image)
    
    # Generate output using the Pix2Pix model
    gen_image = model.predict(src_image)
    gen_image = (gen_image + 1) / 2.0  # scale to [0, 1]
    
    # Resize the generated image to original spectrogram size
    orig_size = (1293, 512)
    gen_image_resized = Image.fromarray((gen_image[0] * 255).astype('uint8')).resize(orig_size).convert('F')
    
    # Convert the image to a numpy array (spectrogram)
    img = np.array(gen_image_resized)

    # Modify the spectrogram randomly
    img = modify_spectrogram(img)
    
    # Save the modified spectrogram as an image
    spectrogram_image_path = save_spectrogram_image(img)
    
    # Convert the spectrogram back to audio using librosa
    wav = librosa.feature.inverse.mel_to_audio(img, sr=44100, n_fft=2048, hop_length=512)
    
    # Save the audio file to a temporary location
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
        sf.write(temp_audio_file.name, wav, samplerate=44100)
        audio_file_path = temp_audio_file.name
    
    return spectrogram_image_path, audio_file_path  # Return the paths for both spectrogram image and audio

# Create a Gradio interface
interface = gr.Interface(
    fn=process_image,
    inputs=gr.Image(type="pil"),  # Input is an image
    outputs=[gr.Image(type="filepath"), gr.Audio(type="filepath")],  # Output both spectrogram image and audio file
    title="Image to Audio Generator with Spectrogram Display",  # App title
    description="Upload an image (preferably a spectrogram), and get an audio file generated using Pix2Pix. You can also see the modified spectrogram.",
)

# Launch the interface
interface.launch()