Spaces:

gnosticdev
/

audio-a-video

Sleeping

File size: 2,994 Bytes

9857383
 
 
 
 
be67e91
 
 
5ef121d
9857383
be67e91
9857383
fec5702
 
 
 
 
 
35ee7d1
be67e91
9857383
 
35ee7d1
be67e91
 
9857383
35ee7d1
be67e91
 
9857383
35ee7d1
be67e91
35ee7d1
be67e91
35ee7d1
be67e91
 
 
 
35ee7d1
be67e91
 
 
35ee7d1
be67e91
5ef121d
be67e91
35ee7d1
 
fec5702
9857383
be67e91
35ee7d1
 
be67e91
 
 
 
 
 
 
9857383
 
fec5702
 
9857383
35ee7d1
9857383
35ee7d1
9857383
35ee7d1
 
9857383
35ee7d1
 
be67e91
9857383
 
88eef70
be67e91

import gradio as gr
import moviepy.editor as mp
import numpy as np
import librosa
import matplotlib.pyplot as plt
from PIL import Image
import tempfile
import os
import io  # Importar el módulo 'io'

def generate_waveform_video(audio_file, image_file):
    try:
        # Validar que los archivos existan
        if not os.path.exists(audio_file):
            raise ValueError("Archivo de audio no encontrado")
        if not os.path.exists(image_file):
            raise ValueError("Archivo de imagen no encontrado")

        # 1. Cargar audio
        y, sr = librosa.load(audio_file)
        duration = librosa.get_duration(y=y, sr=sr)

        # 2. Cargar imagen
        img_clip = mp.ImageClip(image_file).set_duration(duration)
        img_w, img_h = img_clip.size

        # 3. Crear efecto de waveform
        audio_envelope = np.abs(y)
        audio_envelope = (audio_envelope / np.max(audio_envelope)) * (img_h // 3)

        def make_frame(t):
            fig, ax = plt.subplots(figsize=(img_w/100, img_h/100), dpi=100)
            ax.set_xlim(0, duration)
            ax.set_ylim(-img_h//2, img_h//2)
            ax.axis('off')

            time_idx = int(t * sr)
            start = max(0, time_idx - sr//10)
            end = min(len(audio_envelope), time_idx + sr//10)
            wave_slice = audio_envelope[start:end]

            x = np.linspace(t-0.1, t+0.1, len(wave_slice))
            ax.fill_between(x, wave_slice - img_h//4, -wave_slice + img_h//4, 
                           facecolor='red', alpha=0.7)

            # Convertir figura a numpy array usando io.BytesIO
            buf = io.BytesIO()
            plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0)
            plt.close(fig)
            return np.array(Image.open(buf))

        # Crear video
        effect_clip = mp.VideoClip(make_frame, duration=duration).set_fps(24)
        final_clip = mp.CompositeVideoClip([img_clip, effect_clip.set_pos("center")])
        final_clip = final_clip.set_audio(mp.AudioFileClip(audio_file))

        # Guardar en archivo temporal
        with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
            final_clip.write_videofile(tmpfile.name, fps=24, codec="libx264", 
                                      audio_codec="aac", logger=None)
            return tmpfile.name

    except Exception as e:
        # Lanzar excepción para que Gradio muestre el error
        raise Exception(f"Error durante la generación: {str(e)}")

# Interfaz Gradio
iface = gr.Interface(
    fn=generate_waveform_video,
    inputs=[
        gr.Audio(type="filepath", label="Audio (WAV/MP3)"),
        gr.Image(type="filepath", label="Imagen de Fondo"),
    ],
    outputs=gr.Video(label="Video Resultante", format="mp4"),
    title="Generador de Video con Efectos de Audio",
    description="Crea videos con efectos visuales sincronizados con el audio. Actualmente soporta efecto de waveform."
)

if __name__ == "__main__":
    iface.queue().launch()