Spaces:
Running
Running
File size: 4,710 Bytes
7530063 4654b73 7530063 aaa6686 4654b73 7530063 2b4016e 05111e0 2b4016e 4654b73 05111e0 4654b73 05111e0 2b4016e 05111e0 2b4016e aaa6686 05111e0 2b4016e 05111e0 2b4016e 4654b73 7530063 2b4016e 05111e0 4654b73 05111e0 4654b73 aaa6686 05111e0 aaa6686 2b4016e aaa6686 05111e0 d3701bd aaa6686 05111e0 7530063 2b4016e 4654b73 05111e0 4654b73 7530063 05111e0 7530063 4654b73 05111e0 2b4016e 4654b73 7530063 aaa6686 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
# app.py
# =============
# This is a complete app.py file for a Gradio application that allows users to upload an audio file and generate a video with frequency visualization.
import gradio as gr
import numpy as np
import matplotlib.pyplot as plt
import librosa
import librosa.display
import os
import cv2
# Function to generate frequency visualization frames from audio
def generate_frequency_visualization(audio_path, fps, num_bars):
try:
# Load the audio file
y, sr = librosa.load(audio_path, sr=None)
duration = librosa.get_duration(y=y, sr=sr)
print(f"Loaded audio file with sampling rate: {sr}, and duration: {duration} seconds.")
if sr == 0 or len(y) == 0:
raise ValueError("Invalid audio file: sampling rate or audio data is zero.")
# Perform Short-Time Fourier Transform (STFT)
hop_length = int(sr / fps) # Hop length to match the desired fps
S = np.abs(librosa.stft(y, n_fft=2048, hop_length=hop_length))
frequencies = librosa.fft_frequencies(sr=sr)
# Create frequency bins for the bars
bins = np.linspace(0, len(frequencies), num_bars + 1, dtype=int)
bar_heights = []
# Aggregate power for each bar
for i in range(len(S[0])):
frame = S[:, i]
bar_frame = [np.mean(frame[bins[j]:bins[j+1]]) for j in range(num_bars)]
bar_heights.append(bar_frame)
# Create a directory to save the frames
os.makedirs('frames', exist_ok=True)
# Generate and save each frame
for i, heights in enumerate(bar_heights):
fig, ax = plt.subplots(figsize=(10, 6))
ax.bar(range(num_bars), heights, color=plt.cm.viridis(np.linspace(0, 1, num_bars)))
ax.set_ylim(0, np.max(S))
ax.axis('off')
plt.savefig(f'frames/frame_{i:04d}.png', bbox_inches='tight', pad_inches=0)
plt.close()
print(f"Generated {len(bar_heights)} frames for visualization.")
return 'frames', duration
except Exception as e:
print(f"Error generating frequency visualization: {e}")
return None, None
# Function to create a video from the generated frames
def create_video_from_frames(frames_directory, audio_path, fps, duration):
try:
# Get the list of frame files
frame_files = [os.path.join(frames_directory, f) for f in os.listdir(frames_directory) if f.endswith('.png')]
frame_files.sort()
if not frame_files:
raise ValueError("No frames found to create the video.")
# Get video dimensions from the first frame
first_frame = cv2.imread(frame_files[0])
height, width, _ = first_frame.shape
# Initialize video writer
video_path = 'output_video.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
video_writer = cv2.VideoWriter(video_path, fourcc, fps, (width, height))
# Write frames to video
for frame_file in frame_files:
frame = cv2.imread(frame_file)
video_writer.write(frame)
video_writer.release()
# Merge audio with video using ffmpeg
os.system(f"ffmpeg -i {video_path} -i {audio_path} -c:v copy -c:a aac -strict experimental output_with_audio.mp4 -y")
print(f"Video created with {len(frame_files)} frames.")
return 'output_with_audio.mp4'
except Exception as e:
print(f"Error creating video from frames: {e}")
return None
# Gradio interface function
def process_audio(audio):
audio_path = audio
fps = 60
num_bars = 12
frames_directory, duration = generate_frequency_visualization(audio_path, fps, num_bars)
if frames_directory:
video_path = create_video_from_frames(frames_directory, audio_path, fps, duration)
return video_path
else:
return None
# Create the Gradio interface with explanations and recommendations
iface = gr.Interface(
fn=process_audio,
inputs=gr.Audio(type="filepath", label="Upload Audio File"),
outputs=gr.Video(label="Generated Video"),
title="Audio Frequency Visualization",
description="Upload an audio file to generate a video with frequency visualization. "
"Supported file types: WAV, MP3, FLAC. "
"Recommended file duration: 10 seconds to 5 minutes. "
"The visualization will consist of 12 bars representing frequency ranges.",
)
# Launch the Gradio interface
if __name__ == "__main__":
iface.launch()
# Dependencies
# =============
# The following dependencies are required to run this app:
# - librosa
# - numpy
# - matplotlib
# - opencv-python
# - ffmpeg (installed separately)
|