Spaces:
Runtime error
Runtime error
File size: 7,419 Bytes
6b7f89f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 |
import gradio as gr
import matplotlib.pyplot as plt
import librosa
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from moviepy.editor import *
from moviepy.video.io.VideoFileClip import VideoFileClip
def make_bars_image(height_values, index, new_height):
# Define the size of the image
width = 1024
height = new_height
# Create a new image with a transparent background
image = Image.new('RGBA', (width, height), color=(0, 0, 0, 0))
# Get the image drawing context
draw = ImageDraw.Draw(image)
# Define the rectangle width and spacing
rect_width = 4
spacing = 4
# Define the list of height values for the rectangles
#height_values = [20, 40, 60, 80, 100, 80, 60, 40]
num_bars = len(height_values)
# Calculate the total width of the rectangles and the spacing
total_width = num_bars * rect_width + (num_bars - 1) * spacing
# Calculate the starting position for the first rectangle
start_x = int((width - total_width) / 2)
# Define the buffer size
buffer_size = int(80 * 2)
# Draw the rectangles from left to right
x = start_x
for i, height in enumerate(height_values):
# Define the rectangle coordinates
y0 = buffer_size
y1 = height + buffer_size
x0 = x
x1 = x + rect_width
# Draw the rectangle
draw.rectangle([x0, y0, x1, y1], fill='white')
# Move to the next rectangle position
if i < num_bars - 1:
x += rect_width + spacing
# Rotate the image by 180 degrees
image = image.rotate(180)
# Mirror the image
image = image.transpose(Image.FLIP_LEFT_RIGHT)
# Save the image
image.save('audio_bars_'+ str(index) + '.png')
return 'audio_bars_'+ str(index) + '.png'
def db_to_height(db_value):
# Scale the dB value to a range between 0 and 1
scaled_value = (db_value + 80) / 80
# Convert the scaled value to a height between 0 and 100
height = scaled_value * 50
return height
def infer(title, audio_in, image_in, output_video_path):
# Load the audio file
audio_path = audio_in
audio_data, sr = librosa.load(audio_path)
# Get the duration in seconds
duration = librosa.get_duration(y=audio_data, sr=sr)
# Extract the audio data for the desired time
start_time = 0 # start time in seconds
end_time = duration # end time in seconds
start_index = int(start_time * sr)
end_index = int(end_time * sr)
audio_data = audio_data[start_index:end_index]
# Compute the short-time Fourier transform
hop_length = 1024
stft = librosa.stft(audio_data, hop_length=hop_length)
spectrogram = librosa.amplitude_to_db(np.abs(stft), ref=np.max)
# Get the frequency values
freqs = librosa.fft_frequencies(sr=sr, n_fft=stft.shape[0])
# Select the indices of the frequency values that correspond to the desired frequencies
n_freqs = 114
freq_indices = np.linspace(0, len(freqs) - 1, n_freqs, dtype=int)
# Extract the dB values for the desired frequencies
db_values = []
for i in range(spectrogram.shape[1]):
db_values.append(list(zip(freqs[freq_indices], spectrogram[freq_indices, i])))
# Print the dB values for the first time frame
print(db_values[0])
proportional_values = []
for frame in db_values:
proportional_frame = [db_to_height(db) for f, db in frame]
proportional_values.append(proportional_frame)
print(proportional_values[0])
print("AUDIO CHUNK: " + str(len(proportional_values)))
# Open the background image
background_image = Image.open(image_in)
# Resize the image while keeping its aspect ratio
bg_width, bg_height = background_image.size
aspect_ratio = bg_width / bg_height
new_width = 1024
new_height = int(new_width / aspect_ratio)
resized_bg = background_image.resize((new_width, new_height))
# Apply black cache for better visibility of the white text
bg_cache = Image.open('black_cache.png')
# Resize black_cache image to fit with the width
black_cache_width, black_cache_height = bg_cache.size
new_bc_width = 1024
new_bc_height = black_cache_height * 2
bg_cache = bg_cache.resize((new_bc_width, new_bc_height), Image.LANCZOS)
resized_bg.paste(bg_cache, (0, resized_bg.height - bg_cache.height), mask=bg_cache)
# Create a new ImageDraw object
draw = ImageDraw.Draw(resized_bg)
# Define the text to be added
text = title
font = ImageFont.truetype("Lato-Regular.ttf", 16)
text_color = (255, 255, 255) # white color
# Calculate the position of the text
#text_width, text_height = draw.textsize(text, font=font)
x = int(30 * 2)
y = new_height - (70 * 2)
# Draw the text on the image
draw.text((x, y), text, fill=text_color, font=font)
# Save the resized image
resized_bg.save('resized_background.jpg')
generated_frames = []
for i, frame in enumerate(proportional_values):
bars_img = make_bars_image(frame, i, new_height)
bars_img = Image.open(bars_img)
# Paste the audio bars image on top of the background image
fresh_bg = Image.open('resized_background.jpg')
fresh_bg.paste(bars_img, (0, 0), mask=bars_img)
# Save the image
fresh_bg.save('audio_bars_with_bg' + str(i) + '.jpg')
generated_frames.append('audio_bars_with_bg' + str(i) + '.jpg')
print(generated_frames)
# Create a video clip from the images
clip = ImageSequenceClip(generated_frames, fps=len(generated_frames)/(end_time-start_time))
audio_clip = AudioFileClip(audio_in)
clip = clip.set_audio(audio_clip)
# Set the output codec
codec = 'libx264'
audio_codec = 'aac'
# Save the video to a file
clip.write_videofile("my_video.mp4", codec=codec, audio_codec=audio_codec)
retimed_clip = VideoFileClip("my_video.mp4")
# Set the desired frame rate
new_fps = 25
# Create a new clip with the new frame rate
new_clip = retimed_clip.set_fps(new_fps)
# Save the new clip as a new video file
new_clip.write_videofile(output_video_path, codec=codec, audio_codec=audio_codec)
# Visualize the audio bars
plt.figure(figsize=(10, 4))
librosa.display.specshow(spectrogram, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title('Audio Bars Visualization')
# Save the image as a JPG file
output_path = 'image_out.jpg'
plt.savefig(output_path, dpi=300, bbox_inches='tight')
#test make image bars
#bars_img = make_bars_image(proportional_values[0])
return output_video_path, 'image_out.jpg'
gr.Interface(fn=infer,
inputs=[gr.Textbox(placeholder='FIND A GOOD TITLE'),
gr.Audio(source='upload', type='filepath'),
gr.Image(source='upload', type='filepath'),
gr.Textbox(label="Output video path", value="my_final_video.mp4", visible=False)],
outputs=[gr.Video(label='video result'), gr.Image(label='spectrogram image')],
title='Animated Audio Visualizer', description='<p style="text-align: center;">Upload an audio file, upload a background image, choose a good title, click submit.</p>').launch() |