'''
import pandas as pd
from gradio_client import Client

# 读取CSV文件
df = pd.read_csv("wan_gen_videos_captioned/metadata.csv")

# 初始化Gradio客户端
client = Client("http://localhost:7860")

# 逐行处理DataFrame
for index, row in df.iterrows():
    try:
        # 构建文件路径
        file_path = f"wan_gen_videos_captioned/{row['file_name']}"
        
        # 调用API
        result = client.predict(
            file_path,  # 视频文件路径
            row['prompt'],  # 对应的prompt
            4.5,  # CFG Scale
            50,  # Steps
            1,  # Sample Nums
            fn_index=1
        )
        
        # 打印结果
        print(f"处理成功 - 行 {index}: {result}")
        
    except Exception as e:
        # 捕获异常并继续处理下一行
        print(f"处理失败 - 行 {index}: {str(e)}")
        continue

print("所有行处理完成")


import pandas as pd
import pathlib
import numpy as np
import shutil
import os

# 获取HunyuanVideo-Foley目录下的MP4文件路径
l0 = pd.Series(list(pathlib.Path(".").rglob("*.mp4"))).map(str).map(
    lambda x: x if "17563" in x else np.nan
).dropna().sort_values().values.tolist()

# 读取CSV文件
df = pd.read_csv("wan_gen_videos_captioned/metadata.csv")
print(df.shape, len(l0))
df["file_name"] = pd.Series(l0).map(lambda x: x.split("/")[-1]).values.tolist()

# 创建目标文件夹
target_folder = "wan_gen_videos_HunyuanVideo_Foley_sound_captioned"
os.makedirs(target_folder, exist_ok=True)

# 处理每一行数据
for index, row in df.iterrows():
    try:
        # 获取源文件路径和目标文件路径
        source_file = None
        for file_path in l0:
            if row['file_name'] in file_path:
                source_file = file_path
                break
        
        if source_file:
            # 复制MP4文件
            target_mp4 = os.path.join(target_folder, row['file_name'])
            shutil.copy2(source_file, target_mp4)
            
            # 创建对应的文本文件
            txt_filename = os.path.splitext(row['file_name'])[0] + '.txt'
            txt_filepath = os.path.join(target_folder, txt_filename)
            
            with open(txt_filepath, 'w', encoding='utf-8') as f:
                f.write(row['prompt'])
            
            print(f"成功处理: {row['file_name']} -> {txt_filename}")
        else:
            print(f"警告: 未找到文件 {row['file_name']}")
            
    except Exception as e:
        print(f"处理失败 - 行 {index}: {str(e)}")
        continue

print("所有文件处理完成！")
print(f"文件已保存到: {target_folder}")

df.to_csv("wan_gen_videos_HunyuanVideo_Foley_sound_captioned/metadata.csv", index = False)

!cp README.md wan_gen_videos_HunyuanVideo_Foley_sound_captioned
!huggingface-cli upload svjack/wan_gen_videos_HunyuanVideo_Foley_sound_captioned wan_gen_videos_HunyuanVideo_Foley_sound_captioned --repo-type dataset
'''

import spaces
import os
import tempfile
import gradio as gr
import torch
import torchaudio
from loguru import logger
from typing import Optional, Tuple
import random
import numpy as np
from huggingface_hub import snapshot_download
import shutil

from hunyuanvideo_foley.utils.model_utils import load_model
from hunyuanvideo_foley.utils.feature_utils import feature_process
from hunyuanvideo_foley.utils.model_utils import denoise_process
from hunyuanvideo_foley.utils.media_utils import merge_audio_video

# Global variables for model storage
model_dict = None
cfg = None
device = None

# need to modify the model path
MODEL_PATH = os.environ.get("HIFI_FOLEY_MODEL_PATH", "./pretrained_models/")
CONFIG_PATH = "configs/hunyuanvideo-foley-xxl.yaml"

def download_model_from_hf(repo_id: str = "tencent/HunyuanVideo-Foley", local_dir: str = "./pretrained_models") -> str:
    """从HuggingFace自动下载模型到本地目录"""
    try:
        logger.info(f"开始从HuggingFace下载模型：{repo_id}")
        logger.info(f"下载目标目录：{local_dir}")
        
        # 确保本地目录存在
        os.makedirs(local_dir, exist_ok=True)
        
        # 下载整个仓库
        snapshot_download(
            repo_id=repo_id,
            local_dir=local_dir,
            resume_download=True,  # 支持断点续传
            local_files_only=False,  # 允许从网络下载
        )
        
        logger.info(f"✅ 模型下载成功！保存在：{local_dir}")
        return f"✅ 模型从 {repo_id} 下载成功！"
        
    except Exception as e:
        error_msg = f"❌ 模型下载失败：{str(e)}"
        logger.error(error_msg)
        return error_msg

def setup_device(device_str: str = "auto", gpu_id: int = 0) -> torch.device:
    """Setup computing device"""
    if device_str == "auto":
        if torch.cuda.is_available():
            device = torch.device(f"cuda:{gpu_id}")
            logger.info(f"Using CUDA device: {device}")
        elif torch.backends.mps.is_available():
            device = torch.device("mps")
            logger.info("Using MPS device")
        else:
            device = torch.device("cpu")
            logger.info("Using CPU device")
    else:
        if device_str == "cuda":
            device = torch.device(f"cuda:{gpu_id}")
        else:
            device = torch.device(device_str)
        logger.info(f"Using specified device: {device}")
    
    return device

def auto_load_models() -> str:
    """Automatically load preset models"""
    global model_dict, cfg, device
    
    try:
        # 如果模型路径不存在，尝试从HuggingFace下载
        #if not os.path.exists(MODEL_PATH):
        if True:
            logger.info(f"模型路径 {MODEL_PATH} 不存在，开始从HuggingFace下载...")
            download_result = download_model_from_hf(local_dir=MODEL_PATH.rstrip('/'))
            if "失败" in download_result:
                return download_result
                
        # 如果配置文件不存在，也尝试从HuggingFace下载
        if not os.path.exists(CONFIG_PATH):
            logger.info(f"配置文件 {CONFIG_PATH} 不存在，尝试从HuggingFace下载...")
            # 如果是从pretrained_models/配置路径，也尝试下载
            if CONFIG_PATH.startswith("configs/"):
                config_dir = os.path.dirname(CONFIG_PATH)
                if not os.path.exists(config_dir):
                    download_result = download_model_from_hf(local_dir="./")
                    if "失败" in download_result:
                        return download_result
            
            # 最后检查配置文件是否存在
            if not os.path.exists(CONFIG_PATH):
                return f"❌ 配置文件未找到: {CONFIG_PATH}"
        
        # Use GPU by default
        device = setup_device("auto", 0)
        
        # Load model
        logger.info("正在加载模型...")
        logger.info(f"模型路径: {MODEL_PATH}")
        logger.info(f"配置路径: {CONFIG_PATH}")
        
        model_dict, cfg = load_model(MODEL_PATH, CONFIG_PATH, device)
        
        logger.info("✅ 模型加载成功!")
        return "✅ 模型加载成功!"
        
    except Exception as e:
        logger.error(f"模型加载失败: {str(e)}")
        return f"❌ 模型加载失败: {str(e)}"

@spaces.GPU(duration=120)
@torch.inference_mode()
def infer_single_video(
    video_file, 
    text_prompt: str, 
    guidance_scale: float = 4.5, 
    num_inference_steps: int = 50,
    sample_nums: int = 1
) -> Tuple[list, str]:
    """Single video inference"""
    global model_dict, cfg, device
    
    if model_dict is None or cfg is None:
        return [], "❌ Please load the model first!"
    
    if video_file is None:
        return [], "❌ Please upload a video file!"
    
    # Allow empty text prompt, use empty string if no prompt provided
    if text_prompt is None:
        text_prompt = ""
    text_prompt = text_prompt.strip()
    
    try:
        logger.info(f"Processing video: {video_file}")
        logger.info(f"Text prompt: {text_prompt}")
        
        # Feature processing
        visual_feats, text_feats, audio_len_in_s = feature_process(
            video_file,
            text_prompt,
            model_dict,
            cfg
        )
        
        # Denoising process to generate multiple audio samples
        # Note: The model now generates sample_nums audio samples per inference
        # The denoise_process function returns audio with shape [batch_size, channels, samples]
        logger.info(f"Generating {sample_nums} audio samples...")
        audio, sample_rate = denoise_process(
            visual_feats,
            text_feats,
            audio_len_in_s,
            model_dict,
            cfg,
            guidance_scale=guidance_scale,
            num_inference_steps=num_inference_steps,
            batch_size=sample_nums
        )
        
        # Create temporary files to save results
        temp_dir = tempfile.mkdtemp()
        video_outputs = []
        
        # Process each generated audio sample
        for i in range(sample_nums):
            # Save audio file
            audio_output = os.path.join(temp_dir, f"generated_audio_{i+1}.wav")
            torchaudio.save(audio_output, audio[i], sample_rate)
            
            # Merge video and audio
            video_output = os.path.join(temp_dir, f"video_with_audio_{i+1}.mp4")
            from shutil import copy2
            import time
            seconds = int(time.time())
            merge_audio_video(audio_output, video_file, video_output)
            copy2(video_output, os.path.join("." ,str(seconds) + "_" + video_output.split("/")[-1]))
            
            video_outputs.append(video_output)
        
        logger.info(f"Inference completed! Generated {sample_nums} samples.")
        return video_outputs, f"✅ Generated {sample_nums} audio sample(s) successfully!"
        
    except Exception as e:
        logger.error(f"Inference failed: {str(e)}")
        return [], f"❌ Inference failed: {str(e)}"

def update_video_outputs(video_list, status_msg):
    """Update video outputs based on the number of generated samples"""
    # Initialize all outputs as None
    outputs = [None] * 6
    
    # Set values based on generated videos
    for i, video_path in enumerate(video_list[:6]):  # Max 6 samples
        outputs[i] = video_path
    
    # Return all outputs plus status message
    return tuple(outputs + [status_msg])

def create_gradio_interface():
    """Create Gradio interface"""
    
    # Custom CSS for beautiful interface with better contrast
    css = """
    .gradio-container {
        font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
        background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
        min-height: 100vh;
    }
    
    .main-header {
        text-align: center;
        padding: 2rem 0;
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        border-radius: 20px;
        margin-bottom: 2rem;
        box-shadow: 0 8px 32px rgba(0,0,0,0.15);
    }
    
    .main-header h1 {
        color: white;
        font-size: 3rem;
        font-weight: 700;
        margin-bottom: 0.5rem;
        text-shadow: 0 2px 10px rgba(0,0,0,0.3);
    }
    
    .main-header p {
        color: rgba(255, 255, 255, 0.95);
        font-size: 1.2rem;
        font-weight: 300;
    }
    
    .status-card {
        background: white;
        border-radius: 15px;
        padding: 1rem;
        margin-bottom: 1.5rem;
        border: 1px solid #e1e5e9;
        box-shadow: 0 4px 20px rgba(0,0,0,0.08);
    }
    
    .status-card label {
        color: #2d3748 !important;
        font-weight: 600 !important;
    }
    
    .usage-guide h3 {
        color: #2d3748 !important;
        font-weight: 600 !important;
        margin-bottom: 0.5rem !important;
    }
    
    .usage-guide p {
        color: #4a5568 !important;
        font-size: 1rem !important;
        line-height: 1.6 !important;
        margin: 0.5rem 0 !important;
    }
    
    .usage-guide strong {
        color: #1a202c !important;
        font-weight: 700 !important;
    }
    
    .usage-guide em {
        color: #1a202c !important;
        font-weight: 700 !important;
        font-style: normal !important;
    }
    
    .main-interface {
        margin-bottom: 2rem;
    }
    
    .input-section {
        background: white;
        border-radius: 20px;
        padding: 2rem;
        margin-right: 1rem;
        box-shadow: 0 8px 32px rgba(0,0,0,0.1);
        border: 1px solid #e1e5e9;
    }
    
    .input-section h3 {
        color: #2d3748 !important;
        font-weight: 600 !important;
        margin-bottom: 1rem !important;
    }
    
    .input-section label {
        color: #4a5568 !important;
        font-weight: 500 !important;
    }
    
    .output-section {
        background: white;
        border-radius: 20px;
        padding: 2rem;
        margin-left: 1rem;
        box-shadow: 0 8px 32px rgba(0,0,0,0.1);
        border: 1px solid #e1e5e9;
    }
    
    .output-section h3 {
        color: #2d3748 !important;
        font-weight: 600 !important;
        margin-bottom: 1rem !important;
    }
    
    .output-section label {
        color: #4a5568 !important;
        font-weight: 500 !important;
    }
    
    .examples-section h3 {
        color: #2d3748 !important;
        font-weight: 600 !important;
        margin-bottom: 1.5rem !important;
    }
    
    .generate-btn {
        background: linear-gradient(45deg, #667eea, #764ba2) !important;
        border: none !important;
        color: white !important;
        font-weight: 600 !important;
        font-size: 1.1rem !important;
        padding: 12px 30px !important;
        border-radius: 25px !important;
        box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4) !important;
        transition: all 0.3s ease !important;
    }
    
    .generate-btn:hover {
        transform: translateY(-2px) !important;
        box-shadow: 0 8px 25px rgba(102, 126, 234, 0.6) !important;
    }
    

    .examples-section {
        background: white;
        border-radius: 20px;
        padding: 2rem;
        margin-top: 2rem;
        box-shadow: 0 8px 32px rgba(0,0,0,0.1);
        border: 1px solid #e1e5e9;
    }
    
    .examples-section p {
        color: #4a5568 !important;
        margin-bottom: 1rem !important;
    }
    
    .example-row {
        background: #f8fafc;
        border: 1px solid #e2e8f0;
        border-radius: 15px;
        padding: 1.5rem;
        margin: 1rem 0;
        transition: all 0.3s ease;
        align-items: center;
    }
    
    .example-row:hover {
        border-color: #667eea;
        transform: translateY(-2px);
        box-shadow: 0 4px 20px rgba(102, 126, 234, 0.15);
    }
    
    .example-row .markdown {
        color: #2d3748 !important;
    }
    
    .example-row .markdown p {
        color: #2d3748 !important;
        margin: 0.5rem 0 !important;
        line-height: 1.5 !important;
    }
    
    .example-row .markdown strong {
        color: #1a202c !important;
        font-weight: 600 !important;
    }
    
    /* Example grid layout styles */
    .example-grid-row {
        margin: 1rem 0;
        gap: 1rem;
    }
    
    .example-item {
        background: #f8fafc;
        border: 1px solid #e2e8f0;
        border-radius: 15px;
        padding: 1rem;
        transition: all 0.3s ease;
        margin: 0.25rem;
        max-width: 250px;
        margin-left: auto;
        margin-right: auto;
    }
    
    .example-item:hover {
        border-color: #667eea;
        transform: translateY(-2px);
        box-shadow: 0 4px 20px rgba(102, 126, 234, 0.15);
    }
    
    .example-caption {
        margin: 0.5rem 0 !important;
        min-height: 2.8rem !important;
        display: flex !important;
        align-items: flex-start !important;
    }
    
    .example-caption p {
        color: #2d3748 !important;
        font-size: 0.9rem !important;
        line-height: 1.4 !important;
        margin: 0.5rem 0 !important;
    }
    
    /* Multi-video gallery styles */
    .additional-samples {
        margin-top: 1rem;
        gap: 0.5rem;
    }
    
    .additional-samples .gradio-video {
        border-radius: 10px;
        overflow: hidden;
    }
    
    /* Video gallery responsive layout */
    .video-gallery {
        display: grid;
        gap: 1rem;
        margin-top: 1rem;
    }
    
    .video-gallery.single {
        grid-template-columns: 1fr;
    }
    
    .video-gallery.dual {
        grid-template-columns: 1fr 1fr;
    }
    
    .video-gallery.multi {
        grid-template-columns: repeat(2, 1fr);
        grid-template-rows: auto auto auto;
    }
    
    .footer-text {
        color: #718096 !important;
        text-align: center;
        padding: 2rem;
        font-size: 0.9rem;
    }
    
    /* Video component styling for consistent size */
    .input-section video,
    .output-section video,
    .example-row video {
        width: 100% !important;
        height: 300px !important;
        object-fit: contain !important;
        border-radius: 10px !important;
        background-color: #000 !important;
    }
    
    .example-row video {
        height: 150px !important;
    }
    
    /* Fix for additional samples video display */
    .additional-samples video {
        height: 150px !important;
        object-fit: contain !important;
        border-radius: 10px !important;
        background-color: #000 !important;
    }
    
    .additional-samples .gradio-video {
        border-radius: 10px !important;
        overflow: hidden !important;
        background-color: #000 !important;
    }
    
    .additional-samples .gradio-video > div {
        background-color: #000 !important;
        border-radius: 10px !important;
    }
    
    /* Video container styling */
    .input-section .video-container,
    .output-section .video-container,
    .example-row .video-container {
        background-color: #000 !important;
        border-radius: 10px !important;
        display: flex !important;
        align-items: center !important;
        justify-content: center !important;
        overflow: hidden !important;
    }
    
    /* Ensure proper alignment */
    .example-row {
        display: flex !important;
        align-items: stretch !important;
    }
    
    .example-row > div {
        display: flex !important;
        flex-direction: column !important;
        justify-content: center !important;
    }
    
    /* Video wrapper for better control */
    .video-wrapper {
        position: relative !important;
        width: 100% !important;
        background: #000 !important;
        border-radius: 10px !important;
        overflow: hidden !important;
        display: flex !important;
        align-items: center !important;
        justify-content: center !important;
    }
    """
    
    with gr.Blocks(css=css, title="HunyuanVideo-Foley") as app:
        
        # Main header
        with gr.Column(elem_classes=["main-header"]):
            gr.HTML("""
            <h1>🎵 HunyuanVideo-Foley</h1>
            <p>Text-Video-to-Audio Synthesis: Generate realistic audio from video and text descriptions</p>
            """)
        
        # Usage Guide
        with gr.Column(elem_classes=["status-card"]):
            gr.Markdown("""
            ### 📋 Quick Start Guide
            **1.** Upload your video file\t**2.** Add optional text description\t**3.** Adjust sample numbers (1-6)\t**4.** Click Generate Audio
            
            💡 For quick start, you can load the prepared examples by clicking the button.
            """, elem_classes=["usage-guide"])
        
        # Main inference interface - Input and Results side by side
        with gr.Row(elem_classes=["main-interface"]):
            # Input section
            with gr.Column(scale=1, elem_classes=["input-section"]):
                gr.Markdown("### 📹 Video Input")
                
                video_input = gr.Video(
                    label="Upload Video",
                    height=300
                )
                
                text_input = gr.Textbox(
                    label="🎯 Audio Description (English)",
                    placeholder="A person walks on frozen ice",
                    lines=3,
                )
                
                with gr.Row():
                    guidance_scale = gr.Slider(
                        minimum=1.0,
                        maximum=10.0,
                        value=4.5,
                        step=0.1,
                        label="🎚️ CFG Scale",
                    )
                    
                    inference_steps = gr.Slider(
                        minimum=10,
                        maximum=100,
                        value=50,
                        step=5,
                        label="⚡ Steps",
                    )
                    
                    sample_nums = gr.Slider(
                        minimum=1,
                        maximum=6,
                        value=1,
                        step=1,
                        label="🎲 Sample Nums",
                    )
                
                generate_btn = gr.Button(
                    "🎵 Generate Audio", 
                    variant="primary",
                    elem_classes=["generate-btn"]
                )
            
            # Results section
            with gr.Column(scale=1, elem_classes=["output-section"]):
                gr.Markdown("### 🎥 Generated Results")
                
                # Multi-video gallery for displaying multiple generated samples
                with gr.Column():
                    # Primary video (Sample 1)
                    video_output_1 = gr.Video(
                        label="Sample 1",
                        height=250,
                        visible=True
                    )
                    
                    # Additional videos (Samples 2-6) - initially hidden
                    with gr.Row(elem_classes=["additional-samples"]):
                        with gr.Column(scale=1):
                            video_output_2 = gr.Video(
                                label="Sample 2",
                                height=150,
                                visible=False
                            )
                            video_output_3 = gr.Video(
                                label="Sample 3", 
                                height=150,
                                visible=False
                            )
                        with gr.Column(scale=1):
                            video_output_4 = gr.Video(
                                label="Sample 4",
                                height=150,
                                visible=False
                            )
                            video_output_5 = gr.Video(
                                label="Sample 5",
                                height=150,
                                visible=False
                            )
                    
                    # Sample 6 - full width
                    video_output_6 = gr.Video(
                        label="Sample 6",
                        height=150,
                        visible=False
                    )
                
                result_text = gr.Textbox(
                    label="Status",
                    interactive=False,
                    lines=2
                )
        
        # Examples section at the bottom
        with gr.Column(elem_classes=["examples-section"]):
            gr.Markdown("### 🌟 Examples")
            gr.Markdown("Click on any example to load it into the interface above")
            
            # Define your custom examples here - 8 examples total
            examples_data = [
                # Example 1
                {
                    "caption": "A person walks on frozen ice",
                    "video_path": "examples/1_video.mp4",
                    "result_path": "examples/1_result.mp4"
                },
                # Example 2
                {
                    "caption": "With a faint sound as their hands parted, the two embraced, a soft 'mm' escaping between them.",
                    "video_path": "examples/2_video.mp4",
                    "result_path": "examples/2_result.mp4"
                },
                # Example 3
                {
                    "caption": "The sound of the number 3's bouncing footsteps is as light and clear as glass marbles hitting the ground. Each step carries a magical sound.", 
                    "video_path": "examples/3_video.mp4",
                    "result_path": "examples/3_result.mp4"
                },
                # Example 4
                {
                    "caption": "gentle gurgling of the stream's current, and music plays in the background which is a beautiful and serene piano solo with a hint of classical charm, evoking a sense of peace and serenity in people's hearts.",
                    "video_path": "examples/4_video.mp4",
                    "result_path": "examples/4_result.mp4"
                },
                # Example 5 - Add your new examples here
                {
                    "caption": "snow crunching under the snowboard's edge.",
                    "video_path": "examples/5_video.mp4",
                    "result_path": "examples/5_result.mp4"
                },
                # Example 6
                {
                    "caption": "The crackling of the fire, the whooshing of the flames, and the occasional crisp popping of charred leaves filled the forest.",
                    "video_path": "examples/6_video.mp4",
                    "result_path": "examples/6_result.mp4"
                },
                # Example 7
                {
                    "caption": "humming of the scooter engine accelerates slowly.",
                    "video_path": "examples/7_video.mp4",
                    "result_path": "examples/7_result.mp4"
                },
                # Example 8
                {
                    "caption": "splash of water and loud thud as person hits the surface.",
                    "video_path": "examples/8_video.mp4",
                    "result_path": "examples/8_result.mp4"
                }
            ]
            
            # Create example grid - 4 examples per row, 2 rows total
            example_buttons = []
            for row in range(2):  # 2 rows
                with gr.Row(elem_classes=["example-grid-row"]):
                    for col in range(4):  # 4 columns
                        idx = row * 4 + col
                        if idx < len(examples_data):
                            example = examples_data[idx]
                            
                            with gr.Column(scale=1, elem_classes=["example-item"]):
                                # Video thumbnail
                                if os.path.exists(example['video_path']):
                                    example_video = gr.Video(
                                        value=example['video_path'],
                                        label=f"Example {idx+1}",
                                        interactive=False,
                                        show_label=True,
                                        height=180
                                    )
                                else:
                                    example_video = gr.HTML(f"""
                                    <div style="background: #f0f0f0; padding: 15px; text-align: center; border-radius: 8px; height: 180px; display: flex; align-items: center; justify-content: center;">
                                        <div>
                                            <p style="color: #666; margin: 0; font-size: 12px;">📹 Video not found</p>
                                            <small style="color: #999; font-size: 10px;">{example['video_path']}</small>
                                        </div>
                                    </div>
                                    """)
                                
                                # Caption (truncated for grid layout)
                                caption_preview = example['caption'][:60] + "..." if len(example['caption']) > 60 else example['caption']
                                gr.Markdown(f"{caption_preview}", elem_classes=["example-caption"])
                                
                                # Load button
                                example_btn = gr.Button(
                                    f"Load Example {idx+1}",
                                    variant="secondary",
                                    size="sm"
                                )
                                example_buttons.append((example_btn, example))
        
        # Event handlers
        def process_inference(video_file, text_prompt, guidance_scale, inference_steps, sample_nums):
            # Generate videos
            video_list, status_msg = infer_single_video(
                video_file, text_prompt, guidance_scale, inference_steps, int(sample_nums)
            )
            # Update outputs with proper visibility
            return update_video_outputs(video_list, status_msg)
        
        # Add dynamic visibility control based on sample_nums
        def update_visibility(sample_nums):
            sample_nums = int(sample_nums)
            return [
                gr.update(visible=True),  # Sample 1 always visible
                gr.update(visible=sample_nums >= 2),  # Sample 2
                gr.update(visible=sample_nums >= 3),  # Sample 3
                gr.update(visible=sample_nums >= 4),  # Sample 4
                gr.update(visible=sample_nums >= 5),  # Sample 5
                gr.update(visible=sample_nums >= 6),  # Sample 6
            ]
        
        # Update visibility when sample_nums changes
        sample_nums.change(
            fn=update_visibility,
            inputs=[sample_nums],
            outputs=[video_output_1, video_output_2, video_output_3, video_output_4, video_output_5, video_output_6]
        )
        
        generate_btn.click(
            fn=process_inference,
            inputs=[video_input, text_input, guidance_scale, inference_steps, sample_nums],
            outputs=[
                video_output_1,  # Sample 1 value
                video_output_2,  # Sample 2 value  
                video_output_3,  # Sample 3 value
                video_output_4,  # Sample 4 value
                video_output_5,  # Sample 5 value
                video_output_6,  # Sample 6 value
                result_text
            ]
        )
        
        # Add click handlers for example buttons
        for btn, example in example_buttons:
            def create_example_handler(ex):
                def handler():
                    # Check if files exist, if not, return placeholder message
                    if os.path.exists(ex['video_path']):
                        video_file = ex['video_path']
                    else:
                        video_file = None
                        
                    if os.path.exists(ex['result_path']):
                        result_video = ex['result_path']
                    else:
                        result_video = None
                    
                    status_msg = f"✅ Loaded example with caption: {ex['caption'][:50]}..."
                    if not video_file:
                        status_msg += f"\n⚠️ Video file not found: {ex['video_path']}"
                    if not result_video:
                        status_msg += f"\n⚠️ Result video not found: {ex['result_path']}"
                        
                    return video_file, ex['caption'], result_video, status_msg
                return handler
            
            btn.click(
                fn=create_example_handler(example),
                outputs=[video_input, text_input, video_output_1, result_text]
            )
        
        # Footer
        gr.HTML("""
        <div class="footer-text">
            <p>🚀 Powered by HunyuanVideo-Foley | Generate high-quality audio from video and text descriptions</p>
        </div>
        """)
    
    return app

def set_manual_seed(global_seed):
    random.seed(global_seed)
    np.random.seed(global_seed)
    torch.manual_seed(global_seed)

if __name__ == "__main__":
    set_manual_seed(1)
    # Setup logging
    logger.remove()
    logger.add(lambda msg: print(msg, end=''), level="INFO")
    
    # Auto-load model
    logger.info("Starting application and loading model...")
    model_load_result = auto_load_models()
    logger.info(model_load_result)
    
    # Create and launch Gradio app
    app = create_gradio_interface()
    
    # Log completion status
    if "successfully" in model_load_result:
        logger.info("Application ready, model loaded")
    
    # Test
    app.launch(
        server_name="0.0.0.0",
        #  server_port=8080,
        share=False,
        debug=False,
        show_error=True
    )