|
import spaces |
|
import os |
|
import tempfile |
|
import gradio as gr |
|
import torch |
|
import torchaudio |
|
from loguru import logger |
|
from typing import Optional, Tuple |
|
import random |
|
import numpy as np |
|
from huggingface_hub import snapshot_download |
|
import shutil |
|
|
|
from hunyuanvideo_foley.utils.model_utils import load_model |
|
from hunyuanvideo_foley.utils.feature_utils import feature_process |
|
from hunyuanvideo_foley.utils.model_utils import denoise_process |
|
from hunyuanvideo_foley.utils.media_utils import merge_audio_video |
|
|
|
|
|
model_dict = None |
|
cfg = None |
|
device = None |
|
|
|
|
|
MODEL_PATH = os.environ.get("HIFI_FOLEY_MODEL_PATH", "./pretrained_models/") |
|
CONFIG_PATH = "configs/hunyuanvideo-foley-xxl.yaml" |
|
|
|
def download_model_from_hf(repo_id: str = "tencent/HunyuanVideo-Foley", local_dir: str = "./pretrained_models") -> str: |
|
"""从HuggingFace自动下载模型到本地目录""" |
|
try: |
|
logger.info(f"开始从HuggingFace下载模型:{repo_id}") |
|
logger.info(f"下载目标目录:{local_dir}") |
|
|
|
|
|
os.makedirs(local_dir, exist_ok=True) |
|
|
|
|
|
snapshot_download( |
|
repo_id=repo_id, |
|
local_dir=local_dir, |
|
resume_download=True, |
|
local_files_only=False, |
|
) |
|
|
|
logger.info(f"✅ 模型下载成功!保存在:{local_dir}") |
|
return f"✅ 模型从 {repo_id} 下载成功!" |
|
|
|
except Exception as e: |
|
error_msg = f"❌ 模型下载失败:{str(e)}" |
|
logger.error(error_msg) |
|
return error_msg |
|
|
|
def setup_device(device_str: str = "auto", gpu_id: int = 0) -> torch.device: |
|
"""Setup computing device""" |
|
if device_str == "auto": |
|
if torch.cuda.is_available(): |
|
device = torch.device(f"cuda:{gpu_id}") |
|
logger.info(f"Using CUDA device: {device}") |
|
elif torch.backends.mps.is_available(): |
|
device = torch.device("mps") |
|
logger.info("Using MPS device") |
|
else: |
|
device = torch.device("cpu") |
|
logger.info("Using CPU device") |
|
else: |
|
if device_str == "cuda": |
|
device = torch.device(f"cuda:{gpu_id}") |
|
else: |
|
device = torch.device(device_str) |
|
logger.info(f"Using specified device: {device}") |
|
|
|
return device |
|
|
|
def auto_load_models() -> str: |
|
"""Automatically load preset models""" |
|
global model_dict, cfg, device |
|
|
|
try: |
|
|
|
if not os.path.exists(MODEL_PATH): |
|
logger.info(f"模型路径 {MODEL_PATH} 不存在,开始从HuggingFace下载...") |
|
download_result = download_model_from_hf(local_dir=MODEL_PATH.rstrip('/')) |
|
if "失败" in download_result: |
|
return download_result |
|
|
|
|
|
if not os.path.exists(CONFIG_PATH): |
|
logger.info(f"配置文件 {CONFIG_PATH} 不存在,尝试从HuggingFace下载...") |
|
|
|
if CONFIG_PATH.startswith("configs/"): |
|
config_dir = os.path.dirname(CONFIG_PATH) |
|
if not os.path.exists(config_dir): |
|
download_result = download_model_from_hf(local_dir="./") |
|
if "失败" in download_result: |
|
return download_result |
|
|
|
|
|
if not os.path.exists(CONFIG_PATH): |
|
return f"❌ 配置文件未找到: {CONFIG_PATH}" |
|
|
|
|
|
device = setup_device("auto", 0) |
|
|
|
|
|
logger.info("正在加载模型...") |
|
logger.info(f"模型路径: {MODEL_PATH}") |
|
logger.info(f"配置路径: {CONFIG_PATH}") |
|
|
|
model_dict, cfg = load_model(MODEL_PATH, CONFIG_PATH, device) |
|
|
|
logger.info("✅ 模型加载成功!") |
|
return "✅ 模型加载成功!" |
|
|
|
except Exception as e: |
|
logger.error(f"模型加载失败: {str(e)}") |
|
return f"❌ 模型加载失败: {str(e)}" |
|
|
|
@spaces.GPU(duration=120) |
|
@torch.inference_mode() |
|
def infer_single_video( |
|
video_file, |
|
text_prompt: str, |
|
guidance_scale: float = 4.5, |
|
num_inference_steps: int = 50, |
|
sample_nums: int = 1 |
|
) -> Tuple[list, str]: |
|
"""Single video inference""" |
|
global model_dict, cfg, device |
|
|
|
if model_dict is None or cfg is None: |
|
return [], "❌ Please load the model first!" |
|
|
|
if video_file is None: |
|
return [], "❌ Please upload a video file!" |
|
|
|
|
|
if text_prompt is None: |
|
text_prompt = "" |
|
text_prompt = text_prompt.strip() |
|
|
|
try: |
|
logger.info(f"Processing video: {video_file}") |
|
logger.info(f"Text prompt: {text_prompt}") |
|
|
|
|
|
visual_feats, text_feats, audio_len_in_s = feature_process( |
|
video_file, |
|
text_prompt, |
|
model_dict, |
|
cfg |
|
) |
|
|
|
|
|
|
|
|
|
logger.info(f"Generating {sample_nums} audio samples...") |
|
audio, sample_rate = denoise_process( |
|
visual_feats, |
|
text_feats, |
|
audio_len_in_s, |
|
model_dict, |
|
cfg, |
|
guidance_scale=guidance_scale, |
|
num_inference_steps=num_inference_steps, |
|
batch_size=sample_nums |
|
) |
|
|
|
|
|
temp_dir = tempfile.mkdtemp() |
|
video_outputs = [] |
|
|
|
|
|
for i in range(sample_nums): |
|
|
|
audio_output = os.path.join(temp_dir, f"generated_audio_{i+1}.wav") |
|
torchaudio.save(audio_output, audio[i], sample_rate) |
|
|
|
|
|
video_output = os.path.join(temp_dir, f"video_with_audio_{i+1}.mp4") |
|
merge_audio_video(audio_output, video_file, video_output) |
|
video_outputs.append(video_output) |
|
|
|
logger.info(f"Inference completed! Generated {sample_nums} samples.") |
|
return video_outputs, f"✅ Generated {sample_nums} audio sample(s) successfully!" |
|
|
|
except Exception as e: |
|
logger.error(f"Inference failed: {str(e)}") |
|
return [], f"❌ Inference failed: {str(e)}" |
|
|
|
def update_video_outputs(video_list, status_msg): |
|
"""Update video outputs based on the number of generated samples""" |
|
|
|
outputs = [None] * 6 |
|
|
|
|
|
for i, video_path in enumerate(video_list[:6]): |
|
outputs[i] = video_path |
|
|
|
|
|
return tuple(outputs + [status_msg]) |
|
|
|
def create_gradio_interface(): |
|
"""Create Gradio interface""" |
|
|
|
|
|
css = """ |
|
.gradio-container { |
|
font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; |
|
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); |
|
min-height: 100vh; |
|
} |
|
|
|
.main-header { |
|
text-align: center; |
|
padding: 2rem 0; |
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
|
border-radius: 20px; |
|
margin-bottom: 2rem; |
|
box-shadow: 0 8px 32px rgba(0,0,0,0.15); |
|
} |
|
|
|
.main-header h1 { |
|
color: white; |
|
font-size: 3rem; |
|
font-weight: 700; |
|
margin-bottom: 0.5rem; |
|
text-shadow: 0 2px 10px rgba(0,0,0,0.3); |
|
} |
|
|
|
.main-header p { |
|
color: rgba(255, 255, 255, 0.95); |
|
font-size: 1.2rem; |
|
font-weight: 300; |
|
} |
|
|
|
.status-card { |
|
background: white; |
|
border-radius: 15px; |
|
padding: 1rem; |
|
margin-bottom: 1.5rem; |
|
border: 1px solid #e1e5e9; |
|
box-shadow: 0 4px 20px rgba(0,0,0,0.08); |
|
} |
|
|
|
.status-card label { |
|
color: #2d3748 !important; |
|
font-weight: 600 !important; |
|
} |
|
|
|
.usage-guide h3 { |
|
color: #2d3748 !important; |
|
font-weight: 600 !important; |
|
margin-bottom: 0.5rem !important; |
|
} |
|
|
|
.usage-guide p { |
|
color: #4a5568 !important; |
|
font-size: 1rem !important; |
|
line-height: 1.6 !important; |
|
margin: 0.5rem 0 !important; |
|
} |
|
|
|
.usage-guide strong { |
|
color: #1a202c !important; |
|
font-weight: 700 !important; |
|
} |
|
|
|
.usage-guide em { |
|
color: #1a202c !important; |
|
font-weight: 700 !important; |
|
font-style: normal !important; |
|
} |
|
|
|
.main-interface { |
|
margin-bottom: 2rem; |
|
} |
|
|
|
.input-section { |
|
background: white; |
|
border-radius: 20px; |
|
padding: 2rem; |
|
margin-right: 1rem; |
|
box-shadow: 0 8px 32px rgba(0,0,0,0.1); |
|
border: 1px solid #e1e5e9; |
|
} |
|
|
|
.input-section h3 { |
|
color: #2d3748 !important; |
|
font-weight: 600 !important; |
|
margin-bottom: 1rem !important; |
|
} |
|
|
|
.input-section label { |
|
color: #4a5568 !important; |
|
font-weight: 500 !important; |
|
} |
|
|
|
.output-section { |
|
background: white; |
|
border-radius: 20px; |
|
padding: 2rem; |
|
margin-left: 1rem; |
|
box-shadow: 0 8px 32px rgba(0,0,0,0.1); |
|
border: 1px solid #e1e5e9; |
|
} |
|
|
|
.output-section h3 { |
|
color: #2d3748 !important; |
|
font-weight: 600 !important; |
|
margin-bottom: 1rem !important; |
|
} |
|
|
|
.output-section label { |
|
color: #4a5568 !important; |
|
font-weight: 500 !important; |
|
} |
|
|
|
.examples-section h3 { |
|
color: #2d3748 !important; |
|
font-weight: 600 !important; |
|
margin-bottom: 1.5rem !important; |
|
} |
|
|
|
.generate-btn { |
|
background: linear-gradient(45deg, #667eea, #764ba2) !important; |
|
border: none !important; |
|
color: white !important; |
|
font-weight: 600 !important; |
|
font-size: 1.1rem !important; |
|
padding: 12px 30px !important; |
|
border-radius: 25px !important; |
|
box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4) !important; |
|
transition: all 0.3s ease !important; |
|
} |
|
|
|
.generate-btn:hover { |
|
transform: translateY(-2px) !important; |
|
box-shadow: 0 8px 25px rgba(102, 126, 234, 0.6) !important; |
|
} |
|
|
|
|
|
|
|
.examples-section { |
|
background: white; |
|
border-radius: 20px; |
|
padding: 2rem; |
|
margin-top: 2rem; |
|
box-shadow: 0 8px 32px rgba(0,0,0,0.1); |
|
border: 1px solid #e1e5e9; |
|
} |
|
|
|
.examples-section p { |
|
color: #4a5568 !important; |
|
margin-bottom: 1rem !important; |
|
} |
|
|
|
.example-row { |
|
background: #f8fafc; |
|
border: 1px solid #e2e8f0; |
|
border-radius: 15px; |
|
padding: 1.5rem; |
|
margin: 1rem 0; |
|
transition: all 0.3s ease; |
|
align-items: center; |
|
} |
|
|
|
.example-row:hover { |
|
border-color: #667eea; |
|
transform: translateY(-2px); |
|
box-shadow: 0 4px 20px rgba(102, 126, 234, 0.15); |
|
} |
|
|
|
.example-row .markdown { |
|
color: #2d3748 !important; |
|
} |
|
|
|
.example-row .markdown p { |
|
color: #2d3748 !important; |
|
margin: 0.5rem 0 !important; |
|
line-height: 1.5 !important; |
|
} |
|
|
|
.example-row .markdown strong { |
|
color: #1a202c !important; |
|
font-weight: 600 !important; |
|
} |
|
|
|
/* Example grid layout styles */ |
|
.example-grid-row { |
|
margin: 1rem 0; |
|
gap: 1rem; |
|
} |
|
|
|
.example-item { |
|
background: #f8fafc; |
|
border: 1px solid #e2e8f0; |
|
border-radius: 15px; |
|
padding: 1rem; |
|
transition: all 0.3s ease; |
|
margin: 0.25rem; |
|
max-width: 250px; |
|
margin-left: auto; |
|
margin-right: auto; |
|
} |
|
|
|
.example-item:hover { |
|
border-color: #667eea; |
|
transform: translateY(-2px); |
|
box-shadow: 0 4px 20px rgba(102, 126, 234, 0.15); |
|
} |
|
|
|
.example-caption { |
|
margin: 0.5rem 0 !important; |
|
min-height: 2.8rem !important; |
|
display: flex !important; |
|
align-items: flex-start !important; |
|
} |
|
|
|
.example-caption p { |
|
color: #2d3748 !important; |
|
font-size: 0.9rem !important; |
|
line-height: 1.4 !important; |
|
margin: 0.5rem 0 !important; |
|
} |
|
|
|
/* Multi-video gallery styles */ |
|
.additional-samples { |
|
margin-top: 1rem; |
|
gap: 0.5rem; |
|
} |
|
|
|
.additional-samples .gradio-video { |
|
border-radius: 10px; |
|
overflow: hidden; |
|
} |
|
|
|
/* Video gallery responsive layout */ |
|
.video-gallery { |
|
display: grid; |
|
gap: 1rem; |
|
margin-top: 1rem; |
|
} |
|
|
|
.video-gallery.single { |
|
grid-template-columns: 1fr; |
|
} |
|
|
|
.video-gallery.dual { |
|
grid-template-columns: 1fr 1fr; |
|
} |
|
|
|
.video-gallery.multi { |
|
grid-template-columns: repeat(2, 1fr); |
|
grid-template-rows: auto auto auto; |
|
} |
|
|
|
.footer-text { |
|
color: #718096 !important; |
|
text-align: center; |
|
padding: 2rem; |
|
font-size: 0.9rem; |
|
} |
|
|
|
/* Video component styling for consistent size */ |
|
.input-section video, |
|
.output-section video, |
|
.example-row video { |
|
width: 100% !important; |
|
height: 300px !important; |
|
object-fit: contain !important; |
|
border-radius: 10px !important; |
|
background-color: #000 !important; |
|
} |
|
|
|
.example-row video { |
|
height: 150px !important; |
|
} |
|
|
|
/* Fix for additional samples video display */ |
|
.additional-samples video { |
|
height: 150px !important; |
|
object-fit: contain !important; |
|
border-radius: 10px !important; |
|
background-color: #000 !important; |
|
} |
|
|
|
.additional-samples .gradio-video { |
|
border-radius: 10px !important; |
|
overflow: hidden !important; |
|
background-color: #000 !important; |
|
} |
|
|
|
.additional-samples .gradio-video > div { |
|
background-color: #000 !important; |
|
border-radius: 10px !important; |
|
} |
|
|
|
/* Video container styling */ |
|
.input-section .video-container, |
|
.output-section .video-container, |
|
.example-row .video-container { |
|
background-color: #000 !important; |
|
border-radius: 10px !important; |
|
display: flex !important; |
|
align-items: center !important; |
|
justify-content: center !important; |
|
overflow: hidden !important; |
|
} |
|
|
|
/* Ensure proper alignment */ |
|
.example-row { |
|
display: flex !important; |
|
align-items: stretch !important; |
|
} |
|
|
|
.example-row > div { |
|
display: flex !important; |
|
flex-direction: column !important; |
|
justify-content: center !important; |
|
} |
|
|
|
/* Video wrapper for better control */ |
|
.video-wrapper { |
|
position: relative !important; |
|
width: 100% !important; |
|
background: #000 !important; |
|
border-radius: 10px !important; |
|
overflow: hidden !important; |
|
display: flex !important; |
|
align-items: center !important; |
|
justify-content: center !important; |
|
} |
|
""" |
|
|
|
with gr.Blocks(css=css, title="HunyuanVideo-Foley") as app: |
|
|
|
|
|
with gr.Column(elem_classes=["main-header"]): |
|
gr.HTML(""" |
|
<h1>🎵 HunyuanVideo-Foley</h1> |
|
<p>Text-Video-to-Audio Synthesis: Generate realistic audio from video and text descriptions</p> |
|
""") |
|
|
|
|
|
with gr.Column(elem_classes=["status-card"]): |
|
gr.Markdown(""" |
|
### 📋 Quick Start Guide |
|
**1.** Upload your video file\t**2.** Add optional text description\t**3.** Adjust sample numbers (1-6)\t**4.** Click Generate Audio |
|
|
|
💡 For quick start, you can load the prepared examples by clicking the button. |
|
""", elem_classes=["usage-guide"]) |
|
|
|
|
|
with gr.Row(elem_classes=["main-interface"]): |
|
|
|
with gr.Column(scale=1, elem_classes=["input-section"]): |
|
gr.Markdown("### 📹 Video Input") |
|
|
|
video_input = gr.Video( |
|
label="Upload Video", |
|
height=300 |
|
) |
|
|
|
text_input = gr.Textbox( |
|
label="🎯 Audio Description (English)", |
|
placeholder="A person walks on frozen ice", |
|
lines=3, |
|
) |
|
|
|
with gr.Row(): |
|
guidance_scale = gr.Slider( |
|
minimum=1.0, |
|
maximum=10.0, |
|
value=4.5, |
|
step=0.1, |
|
label="🎚️ CFG Scale", |
|
) |
|
|
|
inference_steps = gr.Slider( |
|
minimum=10, |
|
maximum=100, |
|
value=50, |
|
step=5, |
|
label="⚡ Steps", |
|
) |
|
|
|
sample_nums = gr.Slider( |
|
minimum=1, |
|
maximum=6, |
|
value=1, |
|
step=1, |
|
label="🎲 Sample Nums", |
|
) |
|
|
|
generate_btn = gr.Button( |
|
"🎵 Generate Audio", |
|
variant="primary", |
|
elem_classes=["generate-btn"] |
|
) |
|
|
|
|
|
with gr.Column(scale=1, elem_classes=["output-section"]): |
|
gr.Markdown("### 🎥 Generated Results") |
|
|
|
|
|
with gr.Column(): |
|
|
|
video_output_1 = gr.Video( |
|
label="Sample 1", |
|
height=250, |
|
visible=True |
|
) |
|
|
|
|
|
with gr.Row(elem_classes=["additional-samples"]): |
|
with gr.Column(scale=1): |
|
video_output_2 = gr.Video( |
|
label="Sample 2", |
|
height=150, |
|
visible=False |
|
) |
|
video_output_3 = gr.Video( |
|
label="Sample 3", |
|
height=150, |
|
visible=False |
|
) |
|
with gr.Column(scale=1): |
|
video_output_4 = gr.Video( |
|
label="Sample 4", |
|
height=150, |
|
visible=False |
|
) |
|
video_output_5 = gr.Video( |
|
label="Sample 5", |
|
height=150, |
|
visible=False |
|
) |
|
|
|
|
|
video_output_6 = gr.Video( |
|
label="Sample 6", |
|
height=150, |
|
visible=False |
|
) |
|
|
|
result_text = gr.Textbox( |
|
label="Status", |
|
interactive=False, |
|
lines=2 |
|
) |
|
|
|
|
|
with gr.Column(elem_classes=["examples-section"]): |
|
gr.Markdown("### 🌟 Examples") |
|
gr.Markdown("Click on any example to load it into the interface above") |
|
|
|
|
|
examples_data = [ |
|
|
|
{ |
|
"caption": "A person walks on frozen ice", |
|
"video_path": "examples/1_video.mp4", |
|
"result_path": "examples/1_result.mp4" |
|
}, |
|
|
|
{ |
|
"caption": "With a faint sound as their hands parted, the two embraced, a soft 'mm' escaping between them.", |
|
"video_path": "examples/2_video.mp4", |
|
"result_path": "examples/2_result.mp4" |
|
}, |
|
|
|
{ |
|
"caption": "The sound of the number 3's bouncing footsteps is as light and clear as glass marbles hitting the ground. Each step carries a magical sound.", |
|
"video_path": "examples/3_video.mp4", |
|
"result_path": "examples/3_result.mp4" |
|
}, |
|
|
|
{ |
|
"caption": "gentle gurgling of the stream's current, and music plays in the background which is a beautiful and serene piano solo with a hint of classical charm, evoking a sense of peace and serenity in people's hearts.", |
|
"video_path": "examples/4_video.mp4", |
|
"result_path": "examples/4_result.mp4" |
|
}, |
|
|
|
{ |
|
"caption": "snow crunching under the snowboard's edge.", |
|
"video_path": "examples/5_video.mp4", |
|
"result_path": "examples/5_result.mp4" |
|
}, |
|
|
|
{ |
|
"caption": "The crackling of the fire, the whooshing of the flames, and the occasional crisp popping of charred leaves filled the forest.", |
|
"video_path": "examples/6_video.mp4", |
|
"result_path": "examples/6_result.mp4" |
|
}, |
|
|
|
{ |
|
"caption": "humming of the scooter engine accelerates slowly.", |
|
"video_path": "examples/7_video.mp4", |
|
"result_path": "examples/7_result.mp4" |
|
}, |
|
|
|
{ |
|
"caption": "splash of water and loud thud as person hits the surface.", |
|
"video_path": "examples/8_video.mp4", |
|
"result_path": "examples/8_result.mp4" |
|
} |
|
] |
|
|
|
|
|
example_buttons = [] |
|
for row in range(2): |
|
with gr.Row(elem_classes=["example-grid-row"]): |
|
for col in range(4): |
|
idx = row * 4 + col |
|
if idx < len(examples_data): |
|
example = examples_data[idx] |
|
|
|
with gr.Column(scale=1, elem_classes=["example-item"]): |
|
|
|
if os.path.exists(example['video_path']): |
|
example_video = gr.Video( |
|
value=example['video_path'], |
|
label=f"Example {idx+1}", |
|
interactive=False, |
|
show_label=True, |
|
height=180 |
|
) |
|
else: |
|
example_video = gr.HTML(f""" |
|
<div style="background: #f0f0f0; padding: 15px; text-align: center; border-radius: 8px; height: 180px; display: flex; align-items: center; justify-content: center;"> |
|
<div> |
|
<p style="color: #666; margin: 0; font-size: 12px;">📹 Video not found</p> |
|
<small style="color: #999; font-size: 10px;">{example['video_path']}</small> |
|
</div> |
|
</div> |
|
""") |
|
|
|
|
|
caption_preview = example['caption'][:60] + "..." if len(example['caption']) > 60 else example['caption'] |
|
gr.Markdown(f"{caption_preview}", elem_classes=["example-caption"]) |
|
|
|
|
|
example_btn = gr.Button( |
|
f"Load Example {idx+1}", |
|
variant="secondary", |
|
size="sm" |
|
) |
|
example_buttons.append((example_btn, example)) |
|
|
|
|
|
def process_inference(video_file, text_prompt, guidance_scale, inference_steps, sample_nums): |
|
|
|
video_list, status_msg = infer_single_video( |
|
video_file, text_prompt, guidance_scale, inference_steps, int(sample_nums) |
|
) |
|
|
|
return update_video_outputs(video_list, status_msg) |
|
|
|
|
|
def update_visibility(sample_nums): |
|
sample_nums = int(sample_nums) |
|
return [ |
|
gr.update(visible=True), |
|
gr.update(visible=sample_nums >= 2), |
|
gr.update(visible=sample_nums >= 3), |
|
gr.update(visible=sample_nums >= 4), |
|
gr.update(visible=sample_nums >= 5), |
|
gr.update(visible=sample_nums >= 6), |
|
] |
|
|
|
|
|
sample_nums.change( |
|
fn=update_visibility, |
|
inputs=[sample_nums], |
|
outputs=[video_output_1, video_output_2, video_output_3, video_output_4, video_output_5, video_output_6] |
|
) |
|
|
|
generate_btn.click( |
|
fn=process_inference, |
|
inputs=[video_input, text_input, guidance_scale, inference_steps, sample_nums], |
|
outputs=[ |
|
video_output_1, |
|
video_output_2, |
|
video_output_3, |
|
video_output_4, |
|
video_output_5, |
|
video_output_6, |
|
result_text |
|
] |
|
) |
|
|
|
|
|
for btn, example in example_buttons: |
|
def create_example_handler(ex): |
|
def handler(): |
|
|
|
if os.path.exists(ex['video_path']): |
|
video_file = ex['video_path'] |
|
else: |
|
video_file = None |
|
|
|
if os.path.exists(ex['result_path']): |
|
result_video = ex['result_path'] |
|
else: |
|
result_video = None |
|
|
|
status_msg = f"✅ Loaded example with caption: {ex['caption'][:50]}..." |
|
if not video_file: |
|
status_msg += f"\n⚠️ Video file not found: {ex['video_path']}" |
|
if not result_video: |
|
status_msg += f"\n⚠️ Result video not found: {ex['result_path']}" |
|
|
|
return video_file, ex['caption'], result_video, status_msg |
|
return handler |
|
|
|
btn.click( |
|
fn=create_example_handler(example), |
|
outputs=[video_input, text_input, video_output_1, result_text] |
|
) |
|
|
|
|
|
gr.HTML(""" |
|
<div class="footer-text"> |
|
<p>🚀 Powered by HunyuanVideo-Foley | Generate high-quality audio from video and text descriptions</p> |
|
</div> |
|
""") |
|
|
|
return app |
|
|
|
def set_manual_seed(global_seed): |
|
random.seed(global_seed) |
|
np.random.seed(global_seed) |
|
torch.manual_seed(global_seed) |
|
|
|
if __name__ == "__main__": |
|
set_manual_seed(1) |
|
|
|
logger.remove() |
|
logger.add(lambda msg: print(msg, end=''), level="INFO") |
|
|
|
|
|
logger.info("Starting application and loading model...") |
|
model_load_result = auto_load_models() |
|
logger.info(model_load_result) |
|
|
|
|
|
app = create_gradio_interface() |
|
|
|
|
|
if "successfully" in model_load_result: |
|
logger.info("Application ready, model loaded") |
|
|
|
|
|
app.launch( |
|
server_name="0.0.0.0", |
|
|
|
share=False, |
|
debug=False, |
|
show_error=True |
|
) |
|
|