musdfakoc commited on
Commit
8cdbc50
·
verified ·
1 Parent(s): 73ba865

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -12
app.py CHANGED
@@ -61,21 +61,23 @@ def modify_spectrogram(spectrogram):
61
  return spectrogram
62
 
63
  # Save the modified spectrogram image for display
64
- def save_spectrogram_image(spectrogram):
65
  plt.figure(figsize=(10, 4))
66
  plt.imshow(spectrogram, aspect='auto', origin='lower', cmap='gray')
67
  plt.axis('off')
68
 
69
- # Save to a temporary file
70
- with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_image_file:
71
- plt.savefig(temp_image_file.name, bbox_inches='tight', pad_inches=0)
72
- temp_image_path = temp_image_file.name
73
 
74
  plt.close()
75
  return temp_image_path
76
 
77
  # Process the input image and convert to audio
78
  def process_image(input_image):
 
 
 
79
  def load_image(image, size=(256, 256)):
80
  image = image.resize(size)
81
  pixels = img_to_array(image)
@@ -100,16 +102,15 @@ def process_image(input_image):
100
  # Modify the spectrogram randomly
101
  img = modify_spectrogram(img)
102
 
103
- # Save the modified spectrogram as an image
104
- spectrogram_image_path = save_spectrogram_image(img)
105
 
106
  # Convert the spectrogram back to audio using librosa
107
  wav = librosa.feature.inverse.mel_to_audio(img, sr=44100, n_fft=2048, hop_length=512)
108
 
109
- # Save the audio file to a temporary location
110
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
111
- sf.write(temp_audio_file.name, wav, samplerate=44100)
112
- audio_file_path = temp_audio_file.name
113
 
114
  return spectrogram_image_path, audio_file_path # Return the paths for both spectrogram image and audio
115
 
@@ -131,7 +132,7 @@ interface = gr.Interface(
131
  inputs=gr.Image(type="pil"), # Input is an image
132
  outputs=[gr.Image(type="filepath"), gr.Audio(type="filepath")], # Output both spectrogram image and audio file
133
  title="Image to Audio Generator with Spectrogram Display",
134
- description="Upload an image (preferably a spectrogram), and get an audio file generated using Pix2Pix. You can also see the modified spectrogram.",
135
  )
136
 
137
  # Launch the interface
 
61
  return spectrogram
62
 
63
  # Save the modified spectrogram image for display
64
+ def save_spectrogram_image(spectrogram, name):
65
  plt.figure(figsize=(10, 4))
66
  plt.imshow(spectrogram, aspect='auto', origin='lower', cmap='gray')
67
  plt.axis('off')
68
 
69
+ # Create a spectrogram filename based on the uploaded image's name
70
+ temp_image_path = f"{name}_spectrogram.png"
71
+ plt.savefig(temp_image_path, bbox_inches='tight', pad_inches=0)
 
72
 
73
  plt.close()
74
  return temp_image_path
75
 
76
  # Process the input image and convert to audio
77
  def process_image(input_image):
78
+ # Extract the base name from the uploaded image filename (without extension)
79
+ image_name = os.path.splitext(os.path.basename(input_image.name))[0]
80
+
81
  def load_image(image, size=(256, 256)):
82
  image = image.resize(size)
83
  pixels = img_to_array(image)
 
102
  # Modify the spectrogram randomly
103
  img = modify_spectrogram(img)
104
 
105
+ # Save the modified spectrogram as an image, using the same name as the uploaded file
106
+ spectrogram_image_path = save_spectrogram_image(img, image_name)
107
 
108
  # Convert the spectrogram back to audio using librosa
109
  wav = librosa.feature.inverse.mel_to_audio(img, sr=44100, n_fft=2048, hop_length=512)
110
 
111
+ # Save the audio file to a location, using the same name as the uploaded file
112
+ audio_file_path = f"{image_name}_generated_audio.wav"
113
+ sf.write(audio_file_path, wav, samplerate=44100)
 
114
 
115
  return spectrogram_image_path, audio_file_path # Return the paths for both spectrogram image and audio
116
 
 
132
  inputs=gr.Image(type="pil"), # Input is an image
133
  outputs=[gr.Image(type="filepath"), gr.Audio(type="filepath")], # Output both spectrogram image and audio file
134
  title="Image to Audio Generator with Spectrogram Display",
135
+ description="Upload an image, and get an audio file generated using Pix2Pix.",
136
  )
137
 
138
  # Launch the interface