Spaces:

musdfakoc
/

local_intelligence

Sleeping

App Files Files Community

musdfakoc commited on Sep 30, 2024

Commit

f412cfa

verified ·

1 Parent(s): 562e050

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -0

app.py CHANGED Viewed

@@ -8,10 +8,64 @@ import librosa
 import numpy as np
 import soundfile as sf
 import os
 # Load your Pix2Pix model (make sure the path is correct)
 model = load_model('./model_022600.h5', compile=False)
 # Function to process the input image and convert to audio
 def process_image(input_image):
     # Load and preprocess the input image
@@ -35,6 +89,9 @@ def process_image(input_image):
     # Convert the image to a numpy array (spectrogram)
     img = np.array(gen_image_resized)
     # Convert the spectrogram back to audio using librosa
     wav = librosa.feature.inverse.mel_to_audio(img, sr=44100, n_fft=2048, hop_length=512)

 import numpy as np
 import soundfile as sf
 import os
+import random
 # Load your Pix2Pix model (make sure the path is correct)
 model = load_model('./model_022600.h5', compile=False)
+# Function to shift frequencies
+def shift_frequencies(spectrogram, shift):
+    return np.roll(spectrogram, shift, axis=0)
+# Function to apply a frequency filter
+def apply_filter(spectrogram, low_cut, high_cut):
+    filtered = np.copy(spectrogram)
+    filtered[:low_cut, :] = 0  # Attenuate low frequencies
+    filtered[high_cut:, :] = 0  # Attenuate high frequencies
+    return filtered
+# Function to add harmonics
+def add_harmonics(spectrogram, harmonic_shift):
+    harmonics = np.roll(spectrogram, harmonic_shift, axis=0) * 0.5  # Weaken the harmonics
+    return np.clip(spectrogram + harmonics, 0, 1)
+# Function to modulate the amplitude
+def modulate_amplitude(spectrogram, factor):
+    return np.clip(spectrogram * factor, 0, 1)  # Amplify or attenuate the white areas
+# Function to randomly decide which transformations to apply and with what parameters
+def modify_spectrogram(spectrogram):
+    # Random decision for transformations
+    apply_shift = random.choice([True, False])
+    apply_filtering = random.choice([True, False])
+    apply_harmonics = random.choice([True, False])
+    apply_amplitude_modulation = random.choice([True, False])
+    # Randomly select the values for each transformation
+    if apply_shift:
+        shift_value = random.randint(-15, 15)  # Random shift between -15 and 15
+        print(f"Applying frequency shift: {shift_value}")
+        spectrogram = shift_frequencies(spectrogram, shift=shift_value)
+    if apply_filtering:
+        low_cut = random.randint(10, 50)  # Random low_cut between 10 and 50
+        high_cut = random.randint(300, 600)  # Random high_cut between 300 and 600
+        print(f"Applying filter: low_cut={low_cut}, high_cut={high_cut}")
+        spectrogram = apply_filter(spectrogram, low_cut=low_cut, high_cut=high_cut)
+    if apply_harmonics:
+        harmonic_shift = random.randint(2, 10)  # Random harmonic shift between 2 and 10
+        print(f"Applying harmonic shift: {harmonic_shift}")
+        spectrogram = add_harmonics(spectrogram, harmonic_shift=harmonic_shift)
+    if apply_amplitude_modulation:
+        factor = random.uniform(0.8, 2.0)  # Random amplitude factor between 0.8 and 2.0
+        print(f"Applying amplitude modulation: factor={factor}")
+        spectrogram = modulate_amplitude(spectrogram, factor=factor)
+    return spectrogram
 # Function to process the input image and convert to audio
 def process_image(input_image):
     # Load and preprocess the input image
     # Convert the image to a numpy array (spectrogram)
     img = np.array(gen_image_resized)
+    # Modify the spectrogram randomly
+    img = modify_spectrogram(img)
     # Convert the spectrogram back to audio using librosa
     wav = librosa.feature.inverse.mel_to_audio(img, sr=44100, n_fft=2048, hop_length=512)