INVIDEO_BASIC / app.py
gnosticdev's picture
Update app.py
3a7d955 verified
raw
history blame
9.23 kB
import os
import asyncio
import logging
import tempfile
import requests
import re
import math
import edge_tts
import gradio as gr
from pydub import AudioSegment
import subprocess
# Configuración básica de logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# Clave API de Pexels (configurar en Secrets de Hugging Face)
PEXELS_API_KEY = os.environ.get("PEXELS_API_KEY", "YOUR_API_KEY")
# --- Funciones optimizadas para Spaces ---
def extract_keywords(text, max_keywords=3):
"""Extrae palabras clave usando un método simple pero efectivo"""
# Limpieza de texto
text = re.sub(r'[^\w\s]', '', text.lower())
words = text.split()
# Palabras comunes a excluir
stop_words = {"el", "la", "los", "las", "de", "en", "y", "a", "que", "es", "por", "un", "una", "con"}
# Frecuencia de palabras
word_freq = {}
for word in words:
if len(word) > 3 and word not in stop_words:
word_freq[word] = word_freq.get(word, 0) + 1
# Ordenar por frecuencia
sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
return [word for word, _ in sorted_words[:max_keywords]]
def search_pexels_videos(keywords, per_query=2):
"""Busca videos en Pexels usando su API oficial"""
if not PEXELS_API_KEY:
logger.error("API_KEY de Pexels no configurada")
return []
headers = {"Authorization": PEXELS_API_KEY}
video_urls = []
for query in keywords:
try:
params = {
"query": query,
"per_page": per_query,
"orientation": "landscape",
"size": "medium"
}
response = requests.get(
"https://api.pexels.com/videos/search",
headers=headers,
params=params,
timeout=15
)
if response.status_code == 200:
data = response.json()
videos = data.get("videos", [])
for video in videos:
video_files = video.get("video_files", [])
if video_files:
# Seleccionar el video con la mejor resolución disponible
best_quality = max(
video_files,
key=lambda x: x.get("width", 0) * x.get("height", 0)
)
video_urls.append(best_quality["link"])
except Exception as e:
logger.error(f"Error buscando videos: {e}")
return video_urls
async def generate_tts(text, output_path, voice="es-ES-ElviraNeural"):
"""Genera audio TTS usando edge-tts"""
try:
communicate = edge_tts.Communicate(text, voice)
await communicate.save(output_path)
return True
except Exception as e:
logger.error(f"Error en TTS: {e}")
return False
def download_video(url, temp_dir):
"""Descarga un video desde una URL a un directorio temporal"""
try:
response = requests.get(url, stream=True, timeout=30)
response.raise_for_status()
filename = f"video_{os.getpid()}.mp4"
filepath = os.path.join(temp_dir, filename)
with open(filepath, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
return filepath
except Exception as e:
logger.error(f"Error descargando video: {e}")
return None
def create_video(audio_path, video_paths, output_path):
"""Crea el video final usando FFmpeg (más eficiente que moviepy)"""
try:
# Crear archivo de lista para concatenación
list_file = "input.txt"
with open(list_file, "w") as f:
for path in video_paths:
f.write(f"file '{os.path.basename(path)}'\n")
# Mover al directorio temporal para procesamiento
os.chdir(os.path.dirname(video_paths[0]))
# Comando FFmpeg para concatenar videos y añadir audio
cmd = [
"ffmpeg", "-y",
"-f", "concat",
"-safe", "0",
"-i", list_file,
"-i", audio_path,
"-c:v", "copy",
"-c:a", "aac",
"-shortest",
output_path
]
subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return True
except Exception as e:
logger.error(f"Error creando video: {e}")
return False
finally:
if os.path.exists(list_file):
os.remove(list_file)
def add_background_music(audio_path, music_path):
"""Añade música de fondo al audio principal"""
try:
speech = AudioSegment.from_file(audio_path)
background = AudioSegment.from_file(music_path) - 20 # Reducir volumen
# Extender música si es necesario
if len(background) < len(speech):
loops = math.ceil(len(speech) / len(background))
background = background * loops
combined = speech.overlay(background[:len(speech)])
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
combined.export(tmp_file.name, format="mp3")
return tmp_file.name
except Exception as e:
logger.error(f"Error mezclando audio: {e}")
return audio_path
async def generate_video(text, music_file=None):
"""Función principal para generar el video"""
temp_dir = tempfile.mkdtemp()
output_files = []
try:
# 1. Generar audio TTS
tts_path = os.path.join(temp_dir, "audio.mp3")
if not await generate_tts(text, tts_path):
return None, "Error generando voz"
output_files.append(tts_path)
# 2. Añadir música de fondo si está disponible
final_audio = tts_path
if music_file:
mixed_audio = add_background_music(tts_path, music_file)
if mixed_audio != tts_path:
final_audio = mixed_audio
output_files.append(mixed_audio)
# 3. Extraer palabras clave
keywords = extract_keywords(text)
logger.info(f"Palabras clave identificadas: {keywords}")
if not keywords:
return None, "No se pudieron extraer palabras clave del texto"
# 4. Buscar y descargar videos
video_urls = search_pexels_videos(keywords)
if not video_urls:
return None, "No se encontraron videos para las palabras clave"
video_paths = []
for url in video_urls:
path = download_video(url, temp_dir)
if path:
video_paths.append(path)
output_files.append(path)
if not video_paths:
return None, "Error descargando videos"
# 5. Crear video final
output_path = os.path.join(temp_dir, "final_video.mp4")
if create_video(final_audio, video_paths, output_path):
return output_path, "Video creado exitosamente"
else:
return None, "Error en la creación del video"
except Exception as e:
logger.exception("Error inesperado")
return None, f"Error: {str(e)}"
finally:
# No eliminamos archivos temporales - Hugging Face los maneja
pass
# --- Interfaz de Gradio optimizada ---
with gr.Blocks(title="Generador Automático de Videos con IA", theme="soft") as demo:
gr.Markdown("# 🎬 Generador Automático de Videos con IA")
gr.Markdown("Transforma texto en videos usando contenido de Pexels y voz sintetizada")
with gr.Row():
with gr.Column():
text_input = gr.Textbox(
label="Texto para el video",
placeholder="Describe el contenido que quieres en el video...",
lines=5
)
music_input = gr.Audio(
label="Música de fondo (opcional)",
type="filepath"
)
generate_btn = gr.Button("Generar Video", variant="primary")
with gr.Column():
video_output = gr.Video(label="Video Generado", interactive=False)
status_output = gr.Textbox(label="Estado", interactive=False)
generate_btn.click(
fn=lambda: (None, "Procesando... (esto puede tomar 1-2 minutos)"),
outputs=[video_output, status_output],
queue=False
).then(
fn=generate_video,
inputs=[text_input, music_input],
outputs=[video_output, status_output]
)
gr.Markdown("### Características:")
gr.Markdown("""
- **Extracción inteligente de palabras clave** del texto
- **Búsqueda automática de videos** en Pexels
- **Generación de voz** con Edge TTS
- **Música de fondo opcional**
- **Procesamiento eficiente** con FFmpeg
""")
# Para Hugging Face Spaces
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)