Spaces:

musdfakoc
/

local_intelligence

Sleeping

App Files Files Community

musdfakoc commited on Oct 17, 2024

Commit

8cdbc50

verified ·

1 Parent(s): 73ba865

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -12

app.py CHANGED Viewed

@@ -61,21 +61,23 @@ def modify_spectrogram(spectrogram):
     return spectrogram
 # Save the modified spectrogram image for display
-def save_spectrogram_image(spectrogram):
     plt.figure(figsize=(10, 4))
     plt.imshow(spectrogram, aspect='auto', origin='lower', cmap='gray')
     plt.axis('off')
-    # Save to a temporary file
-    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_image_file:
-        plt.savefig(temp_image_file.name, bbox_inches='tight', pad_inches=0)
-        temp_image_path = temp_image_file.name
     plt.close()
     return temp_image_path
 # Process the input image and convert to audio
 def process_image(input_image):
     def load_image(image, size=(256, 256)):
         image = image.resize(size)
         pixels = img_to_array(image)
@@ -100,16 +102,15 @@ def process_image(input_image):
     # Modify the spectrogram randomly
     img = modify_spectrogram(img)
-    # Save the modified spectrogram as an image
-    spectrogram_image_path = save_spectrogram_image(img)
     # Convert the spectrogram back to audio using librosa
     wav = librosa.feature.inverse.mel_to_audio(img, sr=44100, n_fft=2048, hop_length=512)
-    # Save the audio file to a temporary location
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
-        sf.write(temp_audio_file.name, wav, samplerate=44100)
-        audio_file_path = temp_audio_file.name
     return spectrogram_image_path, audio_file_path  # Return the paths for both spectrogram image and audio
@@ -131,7 +132,7 @@ interface = gr.Interface(
     inputs=gr.Image(type="pil"),  # Input is an image
     outputs=[gr.Image(type="filepath"), gr.Audio(type="filepath")],  # Output both spectrogram image and audio file
     title="Image to Audio Generator with Spectrogram Display",
-    description="Upload an image (preferably a spectrogram), and get an audio file generated using Pix2Pix. You can also see the modified spectrogram.",
 )
 # Launch the interface

     return spectrogram
 # Save the modified spectrogram image for display
+def save_spectrogram_image(spectrogram, name):
     plt.figure(figsize=(10, 4))
     plt.imshow(spectrogram, aspect='auto', origin='lower', cmap='gray')
     plt.axis('off')
+    # Create a spectrogram filename based on the uploaded image's name
+    temp_image_path = f"{name}_spectrogram.png"
+    plt.savefig(temp_image_path, bbox_inches='tight', pad_inches=0)
     plt.close()
     return temp_image_path
 # Process the input image and convert to audio
 def process_image(input_image):
+    # Extract the base name from the uploaded image filename (without extension)
+    image_name = os.path.splitext(os.path.basename(input_image.name))[0]
     def load_image(image, size=(256, 256)):
         image = image.resize(size)
         pixels = img_to_array(image)
     # Modify the spectrogram randomly
     img = modify_spectrogram(img)
+    # Save the modified spectrogram as an image, using the same name as the uploaded file
+    spectrogram_image_path = save_spectrogram_image(img, image_name)
     # Convert the spectrogram back to audio using librosa
     wav = librosa.feature.inverse.mel_to_audio(img, sr=44100, n_fft=2048, hop_length=512)
+    # Save the audio file to a location, using the same name as the uploaded file
+    audio_file_path = f"{image_name}_generated_audio.wav"
+    sf.write(audio_file_path, wav, samplerate=44100)
     return spectrogram_image_path, audio_file_path  # Return the paths for both spectrogram image and audio
     inputs=gr.Image(type="pil"),  # Input is an image
     outputs=[gr.Image(type="filepath"), gr.Audio(type="filepath")],  # Output both spectrogram image and audio file
     title="Image to Audio Generator with Spectrogram Display",
+    description="Upload an image, and get an audio file generated using Pix2Pix.",
 )
 # Launch the interface