Spaces:
Runtime error
Runtime error
File size: 1,672 Bytes
9867d34 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
"""Constants used throughout the HunyuanVideo-Foley project."""
from typing import Dict, List
# Model configuration
DEFAULT_AUDIO_SAMPLE_RATE = 48000
DEFAULT_VIDEO_FPS = 25
DEFAULT_AUDIO_CHANNELS = 2
# Video processing
MAX_VIDEO_DURATION_SECONDS = 15.0
MIN_VIDEO_DURATION_SECONDS = 1.0
# Audio processing
AUDIO_VAE_LATENT_DIM = 128
AUDIO_FRAME_RATE = 75 # frames per second in latent space
# Visual features
FPS_VISUAL: Dict[str, int] = {
"siglip2": 8,
"synchformer": 25
}
# Model paths (can be overridden by environment variables)
DEFAULT_MODEL_PATH = "./pretrained_models/"
DEFAULT_CONFIG_PATH = "configs/hunyuanvideo-foley-xxl.yaml"
# Inference parameters
DEFAULT_GUIDANCE_SCALE = 4.5
DEFAULT_NUM_INFERENCE_STEPS = 50
MIN_GUIDANCE_SCALE = 1.0
MAX_GUIDANCE_SCALE = 10.0
MIN_INFERENCE_STEPS = 10
MAX_INFERENCE_STEPS = 100
# Text processing
MAX_TEXT_LENGTH = 100
DEFAULT_NEGATIVE_PROMPT = "noisy, harsh"
# File extensions
SUPPORTED_VIDEO_EXTENSIONS: List[str] = [".mp4", ".avi", ".mov", ".mkv", ".webm"]
SUPPORTED_AUDIO_EXTENSIONS: List[str] = [".wav", ".mp3", ".flac", ".aac"]
# Quality settings
AUDIO_QUALITY_SETTINGS: Dict[str, List[str]] = {
"high": ["-b:a", "192k"],
"medium": ["-b:a", "128k"],
"low": ["-b:a", "96k"]
}
# Error messages
ERROR_MESSAGES: Dict[str, str] = {
"model_not_loaded": "Model is not loaded. Please load the model first.",
"invalid_video_format": "Unsupported video format. Supported formats: {formats}",
"video_too_long": f"Video duration exceeds maximum of {MAX_VIDEO_DURATION_SECONDS} seconds",
"ffmpeg_not_found": "ffmpeg not found. Please install ffmpeg: https://ffmpeg.org/download.html"
} |