File size: 4,710 Bytes
7530063
 
 
 
4654b73
 
 
7530063
 
 
aaa6686
4654b73
7530063
2b4016e
05111e0
 
 
2b4016e
 
4654b73
05111e0
 
4654b73
05111e0
2b4016e
 
 
 
 
 
 
 
 
 
 
 
 
05111e0
 
 
 
 
2b4016e
aaa6686
 
 
 
05111e0
 
 
2b4016e
 
05111e0
 
2b4016e
4654b73
7530063
2b4016e
05111e0
 
 
 
4654b73
05111e0
 
4654b73
aaa6686
 
 
 
 
05111e0
aaa6686
 
 
 
 
 
 
2b4016e
aaa6686
 
 
 
05111e0
d3701bd
aaa6686
05111e0
 
 
7530063
 
 
 
2b4016e
 
 
 
 
 
 
 
4654b73
05111e0
4654b73
7530063
05111e0
7530063
4654b73
05111e0
 
 
2b4016e
4654b73
 
 
7530063
 
 
 
 
 
 
 
 
aaa6686
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# app.py
# =============
# This is a complete app.py file for a Gradio application that allows users to upload an audio file and generate a video with frequency visualization.

import gradio as gr
import numpy as np
import matplotlib.pyplot as plt
import librosa
import librosa.display
import os
import cv2

# Function to generate frequency visualization frames from audio
def generate_frequency_visualization(audio_path, fps, num_bars):
    try:
        # Load the audio file
        y, sr = librosa.load(audio_path, sr=None)
        duration = librosa.get_duration(y=y, sr=sr)
        print(f"Loaded audio file with sampling rate: {sr}, and duration: {duration} seconds.")

        if sr == 0 or len(y) == 0:
            raise ValueError("Invalid audio file: sampling rate or audio data is zero.")

        # Perform Short-Time Fourier Transform (STFT)
        hop_length = int(sr / fps)  # Hop length to match the desired fps
        S = np.abs(librosa.stft(y, n_fft=2048, hop_length=hop_length))
        frequencies = librosa.fft_frequencies(sr=sr)

        # Create frequency bins for the bars
        bins = np.linspace(0, len(frequencies), num_bars + 1, dtype=int)
        bar_heights = []

        # Aggregate power for each bar
        for i in range(len(S[0])):
            frame = S[:, i]
            bar_frame = [np.mean(frame[bins[j]:bins[j+1]]) for j in range(num_bars)]
            bar_heights.append(bar_frame)

        # Create a directory to save the frames
        os.makedirs('frames', exist_ok=True)

        # Generate and save each frame
        for i, heights in enumerate(bar_heights):
            fig, ax = plt.subplots(figsize=(10, 6))
            ax.bar(range(num_bars), heights, color=plt.cm.viridis(np.linspace(0, 1, num_bars)))
            ax.set_ylim(0, np.max(S))
            ax.axis('off')
            plt.savefig(f'frames/frame_{i:04d}.png', bbox_inches='tight', pad_inches=0)
            plt.close()

        print(f"Generated {len(bar_heights)} frames for visualization.")
        return 'frames', duration
    except Exception as e:
        print(f"Error generating frequency visualization: {e}")
        return None, None

# Function to create a video from the generated frames
def create_video_from_frames(frames_directory, audio_path, fps, duration):
    try:
        # Get the list of frame files
        frame_files = [os.path.join(frames_directory, f) for f in os.listdir(frames_directory) if f.endswith('.png')]
        frame_files.sort()

        if not frame_files:
            raise ValueError("No frames found to create the video.")

        # Get video dimensions from the first frame
        first_frame = cv2.imread(frame_files[0])
        height, width, _ = first_frame.shape

        # Initialize video writer
        video_path = 'output_video.mp4'
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        video_writer = cv2.VideoWriter(video_path, fourcc, fps, (width, height))

        # Write frames to video
        for frame_file in frame_files:
            frame = cv2.imread(frame_file)
            video_writer.write(frame)

        video_writer.release()

        # Merge audio with video using ffmpeg
        os.system(f"ffmpeg -i {video_path} -i {audio_path} -c:v copy -c:a aac -strict experimental output_with_audio.mp4 -y")

        print(f"Video created with {len(frame_files)} frames.")
        return 'output_with_audio.mp4'
    except Exception as e:
        print(f"Error creating video from frames: {e}")
        return None

# Gradio interface function
def process_audio(audio):
    audio_path = audio
    fps = 60
    num_bars = 12
    frames_directory, duration = generate_frequency_visualization(audio_path, fps, num_bars)
    if frames_directory:
        video_path = create_video_from_frames(frames_directory, audio_path, fps, duration)
        return video_path
    else:
        return None

# Create the Gradio interface with explanations and recommendations
iface = gr.Interface(
    fn=process_audio,
    inputs=gr.Audio(type="filepath", label="Upload Audio File"),
    outputs=gr.Video(label="Generated Video"),
    title="Audio Frequency Visualization",
    description="Upload an audio file to generate a video with frequency visualization. "
                "Supported file types: WAV, MP3, FLAC. "
                "Recommended file duration: 10 seconds to 5 minutes. "
                "The visualization will consist of 12 bars representing frequency ranges.",
)

# Launch the Gradio interface
if __name__ == "__main__":
    iface.launch()

# Dependencies
# =============
# The following dependencies are required to run this app:
# - librosa
# - numpy
# - matplotlib
# - opencv-python
# - ffmpeg (installed separately)