James Zhou
[init]
9867d34
"""Constants used throughout the HunyuanVideo-Foley project."""
from typing import Dict, List
# Model configuration
DEFAULT_AUDIO_SAMPLE_RATE = 48000
DEFAULT_VIDEO_FPS = 25
DEFAULT_AUDIO_CHANNELS = 2
# Video processing
MAX_VIDEO_DURATION_SECONDS = 15.0
MIN_VIDEO_DURATION_SECONDS = 1.0
# Audio processing
AUDIO_VAE_LATENT_DIM = 128
AUDIO_FRAME_RATE = 75 # frames per second in latent space
# Visual features
FPS_VISUAL: Dict[str, int] = {
"siglip2": 8,
"synchformer": 25
}
# Model paths (can be overridden by environment variables)
DEFAULT_MODEL_PATH = "./pretrained_models/"
DEFAULT_CONFIG_PATH = "configs/hunyuanvideo-foley-xxl.yaml"
# Inference parameters
DEFAULT_GUIDANCE_SCALE = 4.5
DEFAULT_NUM_INFERENCE_STEPS = 50
MIN_GUIDANCE_SCALE = 1.0
MAX_GUIDANCE_SCALE = 10.0
MIN_INFERENCE_STEPS = 10
MAX_INFERENCE_STEPS = 100
# Text processing
MAX_TEXT_LENGTH = 100
DEFAULT_NEGATIVE_PROMPT = "noisy, harsh"
# File extensions
SUPPORTED_VIDEO_EXTENSIONS: List[str] = [".mp4", ".avi", ".mov", ".mkv", ".webm"]
SUPPORTED_AUDIO_EXTENSIONS: List[str] = [".wav", ".mp3", ".flac", ".aac"]
# Quality settings
AUDIO_QUALITY_SETTINGS: Dict[str, List[str]] = {
"high": ["-b:a", "192k"],
"medium": ["-b:a", "128k"],
"low": ["-b:a", "96k"]
}
# Error messages
ERROR_MESSAGES: Dict[str, str] = {
"model_not_loaded": "Model is not loaded. Please load the model first.",
"invalid_video_format": "Unsupported video format. Supported formats: {formats}",
"video_too_long": f"Video duration exceeds maximum of {MAX_VIDEO_DURATION_SECONDS} seconds",
"ffmpeg_not_found": "ffmpeg not found. Please install ffmpeg: https://ffmpeg.org/download.html"
}