musdfakoc commited on
Commit
9faab60
·
verified ·
1 Parent(s): 50795ae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -5
app.py CHANGED
@@ -9,7 +9,8 @@ import numpy as np
9
  import soundfile as sf
10
  import os
11
  import random
12
- import tempfile # For temporary file handling
 
13
 
14
  # Load your Pix2Pix model (make sure the path is correct)
15
  model = load_model('./model_022600.h5', compile=False)
@@ -66,6 +67,20 @@ def modify_spectrogram(spectrogram):
66
 
67
  return spectrogram
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  # Function to process the input image and convert to audio
70
  def process_image(input_image):
71
  # Load and preprocess the input image
@@ -93,6 +108,9 @@ def process_image(input_image):
93
  # Modify the spectrogram randomly
94
  img = modify_spectrogram(img)
95
 
 
 
 
96
  # Convert the spectrogram back to audio using librosa
97
  wav = librosa.feature.inverse.mel_to_audio(img, sr=44100, n_fft=2048, hop_length=512)
98
 
@@ -101,15 +119,15 @@ def process_image(input_image):
101
  sf.write(temp_audio_file.name, wav, samplerate=44100)
102
  audio_file_path = temp_audio_file.name
103
 
104
- return audio_file_path # Return the file path
105
 
106
  # Create a Gradio interface
107
  interface = gr.Interface(
108
  fn=process_image,
109
  inputs=gr.Image(type="pil"), # Input is an image
110
- outputs=gr.Audio(type="filepath"), # Output is an audio file
111
- title="Image to Audio Generator", # App title
112
- description="Upload an image (preferably a spectrogram), and get an audio file generated using Pix2Pix.",
113
  )
114
 
115
  # Launch the interface
 
9
  import soundfile as sf
10
  import os
11
  import random
12
+ import tempfile
13
+ import matplotlib.pyplot as plt
14
 
15
  # Load your Pix2Pix model (make sure the path is correct)
16
  model = load_model('./model_022600.h5', compile=False)
 
67
 
68
  return spectrogram
69
 
70
+ # Function to save the modified spectrogram image for display
71
+ def save_spectrogram_image(spectrogram):
72
+ plt.figure(figsize=(10, 4))
73
+ plt.imshow(spectrogram, aspect='auto', origin='lower', cmap='gray')
74
+ plt.axis('off')
75
+
76
+ # Save to a temporary file
77
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_image_file:
78
+ plt.savefig(temp_image_file.name, bbox_inches='tight', pad_inches=0)
79
+ temp_image_path = temp_image_file.name
80
+
81
+ plt.close()
82
+ return temp_image_path
83
+
84
  # Function to process the input image and convert to audio
85
  def process_image(input_image):
86
  # Load and preprocess the input image
 
108
  # Modify the spectrogram randomly
109
  img = modify_spectrogram(img)
110
 
111
+ # Save the modified spectrogram as an image
112
+ spectrogram_image_path = save_spectrogram_image(img)
113
+
114
  # Convert the spectrogram back to audio using librosa
115
  wav = librosa.feature.inverse.mel_to_audio(img, sr=44100, n_fft=2048, hop_length=512)
116
 
 
119
  sf.write(temp_audio_file.name, wav, samplerate=44100)
120
  audio_file_path = temp_audio_file.name
121
 
122
+ return spectrogram_image_path, audio_file_path # Return the paths for both spectrogram image and audio
123
 
124
  # Create a Gradio interface
125
  interface = gr.Interface(
126
  fn=process_image,
127
  inputs=gr.Image(type="pil"), # Input is an image
128
+ outputs=[gr.Image(type="file"), gr.Audio(type="filepath")], # Output both spectrogram image and audio file
129
+ title="Image to Audio Generator with Spectrogram Display", # App title
130
+ description="Upload an image (preferably a spectrogram), and get an audio file generated using Pix2Pix. You can also see the modified spectrogram.",
131
  )
132
 
133
  # Launch the interface