File size: 5,389 Bytes
d076b8a
ebb57ae
 
 
 
d076b8a
ebb57ae
00093e0
ebb57ae
 
f412cfa
9faab60
 
d076b8a
ebb57ae
 
 
f412cfa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9faab60
 
 
 
 
 
 
 
 
 
 
 
 
 
ebb57ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f412cfa
 
 
ebb57ae
9faab60
 
 
ebb57ae
 
 
 
50795ae
 
 
ebb57ae
9faab60
ebb57ae
 
 
 
 
095807d
9faab60
 
ebb57ae
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import gradio as gr
from keras.models import load_model
from tensorflow.keras.utils import img_to_array
from tensorflow.keras.utils import load_img
from numpy import expand_dims
from PIL import Image
import librosa
import numpy as np
import soundfile as sf
import os
import random
import tempfile
import matplotlib.pyplot as plt

# Load your Pix2Pix model (make sure the path is correct)
model = load_model('./model_022600.h5', compile=False)

# Function to shift frequencies
def shift_frequencies(spectrogram, shift):
    return np.roll(spectrogram, shift, axis=0)

# Function to apply a frequency filter
def apply_filter(spectrogram, low_cut, high_cut):
    filtered = np.copy(spectrogram)
    filtered[:low_cut, :] = 0  # Attenuate low frequencies
    filtered[high_cut:, :] = 0  # Attenuate high frequencies
    return filtered

# Function to add harmonics
def add_harmonics(spectrogram, harmonic_shift):
    harmonics = np.roll(spectrogram, harmonic_shift, axis=0) * 0.5  # Weaken the harmonics
    return np.clip(spectrogram + harmonics, 0, 1)

# Function to modulate the amplitude
def modulate_amplitude(spectrogram, factor):
    return np.clip(spectrogram * factor, 0, 1)  # Amplify or attenuate the white areas

# Function to randomly decide which transformations to apply and with what parameters
def modify_spectrogram(spectrogram):
    # Random decision for transformations
    apply_shift = random.choice([True, False])
    apply_filtering = random.choice([True, False])
    apply_harmonics = random.choice([True, False])
    apply_amplitude_modulation = random.choice([True, False])
    
    # Randomly select the values for each transformation
    if apply_shift:
        shift_value = random.randint(-15, 15)  # Random shift between -15 and 15
        print(f"Applying frequency shift: {shift_value}")
        spectrogram = shift_frequencies(spectrogram, shift=shift_value)

    if apply_filtering:
        low_cut = random.randint(10, 50)  # Random low_cut between 10 and 50
        high_cut = random.randint(300, 600)  # Random high_cut between 300 and 600
        print(f"Applying filter: low_cut={low_cut}, high_cut={high_cut}")
        spectrogram = apply_filter(spectrogram, low_cut=low_cut, high_cut=high_cut)

    if apply_harmonics:
        harmonic_shift = random.randint(2, 10)  # Random harmonic shift between 2 and 10
        print(f"Applying harmonic shift: {harmonic_shift}")
        spectrogram = add_harmonics(spectrogram, harmonic_shift=harmonic_shift)

    if apply_amplitude_modulation:
        factor = random.uniform(0.8, 2.0)  # Random amplitude factor between 0.8 and 2.0
        print(f"Applying amplitude modulation: factor={factor}")
        spectrogram = modulate_amplitude(spectrogram, factor=factor)
    
    return spectrogram

# Function to save the modified spectrogram image for display
def save_spectrogram_image(spectrogram):
    plt.figure(figsize=(10, 4))
    plt.imshow(spectrogram, aspect='auto', origin='lower', cmap='gray')
    plt.axis('off')
    
    # Save to a temporary file
    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_image_file:
        plt.savefig(temp_image_file.name, bbox_inches='tight', pad_inches=0)
        temp_image_path = temp_image_file.name
    
    plt.close()
    return temp_image_path

# Function to process the input image and convert to audio
def process_image(input_image):
    # Load and preprocess the input image
    def load_image(image, size=(256, 256)):
        image = image.resize(size)
        pixels = img_to_array(image)
        pixels = (pixels - 127.5) / 127.5
        pixels = expand_dims(pixels, 0)
        return pixels

    # Preprocess the input
    src_image = load_image(input_image)
    
    # Generate output using the Pix2Pix model
    gen_image = model.predict(src_image)
    gen_image = (gen_image + 1) / 2.0  # scale to [0, 1]
    
    # Resize the generated image to original spectrogram size
    orig_size = (1293, 512)
    gen_image_resized = Image.fromarray((gen_image[0] * 255).astype('uint8')).resize(orig_size).convert('F')
    
    # Convert the image to a numpy array (spectrogram)
    img = np.array(gen_image_resized)

    # Modify the spectrogram randomly
    img = modify_spectrogram(img)
    
    # Save the modified spectrogram as an image
    spectrogram_image_path = save_spectrogram_image(img)
    
    # Convert the spectrogram back to audio using librosa
    wav = librosa.feature.inverse.mel_to_audio(img, sr=44100, n_fft=2048, hop_length=512)
    
    # Save the audio file to a temporary location
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
        sf.write(temp_audio_file.name, wav, samplerate=44100)
        audio_file_path = temp_audio_file.name
    
    return spectrogram_image_path, audio_file_path  # Return the paths for both spectrogram image and audio

# Create a Gradio interface
interface = gr.Interface(
    fn=process_image,
    inputs=gr.Image(type="pil"),  # Input is an image
    outputs=[gr.Image(type="filepath"), gr.Audio(type="filepath")],  # Output both spectrogram image and audio file
    title="Image to Audio Generator with Spectrogram Display",  # App title
    description="Upload an image (preferably a spectrogram), and get an audio file generated using Pix2Pix. You can also see the modified spectrogram.",
)

# Launch the interface
interface.launch()