File size: 1,672 Bytes
9867d34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
"""Constants used throughout the HunyuanVideo-Foley project."""

from typing import Dict, List

# Model configuration
DEFAULT_AUDIO_SAMPLE_RATE = 48000
DEFAULT_VIDEO_FPS = 25
DEFAULT_AUDIO_CHANNELS = 2

# Video processing
MAX_VIDEO_DURATION_SECONDS = 15.0
MIN_VIDEO_DURATION_SECONDS = 1.0

# Audio processing
AUDIO_VAE_LATENT_DIM = 128
AUDIO_FRAME_RATE = 75  # frames per second in latent space

# Visual features
FPS_VISUAL: Dict[str, int] = {
    "siglip2": 8, 
    "synchformer": 25
}

# Model paths (can be overridden by environment variables)
DEFAULT_MODEL_PATH = "./pretrained_models/"
DEFAULT_CONFIG_PATH = "configs/hunyuanvideo-foley-xxl.yaml"

# Inference parameters
DEFAULT_GUIDANCE_SCALE = 4.5
DEFAULT_NUM_INFERENCE_STEPS = 50
MIN_GUIDANCE_SCALE = 1.0
MAX_GUIDANCE_SCALE = 10.0
MIN_INFERENCE_STEPS = 10
MAX_INFERENCE_STEPS = 100

# Text processing
MAX_TEXT_LENGTH = 100
DEFAULT_NEGATIVE_PROMPT = "noisy, harsh"

# File extensions
SUPPORTED_VIDEO_EXTENSIONS: List[str] = [".mp4", ".avi", ".mov", ".mkv", ".webm"]
SUPPORTED_AUDIO_EXTENSIONS: List[str] = [".wav", ".mp3", ".flac", ".aac"]

# Quality settings
AUDIO_QUALITY_SETTINGS: Dict[str, List[str]] = {
    "high": ["-b:a", "192k"],
    "medium": ["-b:a", "128k"], 
    "low": ["-b:a", "96k"]
}

# Error messages
ERROR_MESSAGES: Dict[str, str] = {
    "model_not_loaded": "Model is not loaded. Please load the model first.",
    "invalid_video_format": "Unsupported video format. Supported formats: {formats}",
    "video_too_long": f"Video duration exceeds maximum of {MAX_VIDEO_DURATION_SECONDS} seconds",
    "ffmpeg_not_found": "ffmpeg not found. Please install ffmpeg: https://ffmpeg.org/download.html"
}