|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import attrs |
|
|
|
from cosmos_transfer1.diffusion.conditioner import ( |
|
FrameRepeatAttr, |
|
VideoConditionerWithCtrl, |
|
ViewConditionedVideoConditionerWithCtrl, |
|
) |
|
from cosmos_transfer1.diffusion.config.base.conditioner import ( |
|
FPSConfig, |
|
ImageSizeConfig, |
|
NumFramesConfig, |
|
PaddingMaskConfig, |
|
TextConfig, |
|
VideoCondBoolConfig, |
|
) |
|
from cosmos_transfer1.diffusion.datasets.augmentors.control_input import ( |
|
AddControlInput, |
|
AddControlInputDepth, |
|
AddControlInputEdge, |
|
AddControlInputHDMAP, |
|
AddControlInputKeypoint, |
|
AddControlInputLIDAR, |
|
AddControlInputSeg, |
|
AddControlInputUpscale, |
|
) |
|
from cosmos_transfer1.utils.lazy_config import LazyCall as L |
|
from cosmos_transfer1.utils.lazy_config import LazyDict |
|
|
|
CTRL_HINT_KEYS = [ |
|
"control_input_edge", |
|
"control_input_vis", |
|
"control_input_depth", |
|
"control_input_seg", |
|
"control_input_keypoint", |
|
"control_input_upscale", |
|
"control_input_hdmap", |
|
"control_input_lidar", |
|
] |
|
|
|
CTRL_HINT_KEYS_COMB = { |
|
"control_input_vis": [AddControlInput], |
|
"control_input_edge": [AddControlInputEdge], |
|
"control_input_depth": [AddControlInputDepth], |
|
"control_input_seg": [AddControlInputSeg], |
|
"control_input_keypoint": [AddControlInputKeypoint], |
|
"control_input_upscale": [AddControlInputUpscale], |
|
"control_input_hdmap": [AddControlInputHDMAP], |
|
"control_input_lidar": [AddControlInputLIDAR], |
|
} |
|
|
|
|
|
SS_CTRL_HINT_KEYS = [ |
|
"control_input_canny", |
|
"control_input_canny_blur", |
|
"control_input_blur", |
|
"control_input_upscale", |
|
] |
|
|
|
|
|
CTRL_AUG_KEYS = { |
|
"depth": "depth", |
|
"seg": "segmentation", |
|
"keypoint": "keypoint", |
|
} |
|
|
|
|
|
@attrs.define(slots=False) |
|
class FrameRepeatConfig: |
|
obj: LazyDict = L(FrameRepeatAttr)() |
|
dropout_rate: float = 0.0 |
|
input_key: str = "frame_repeat" |
|
|
|
|
|
BaseVideoConditionerWithCtrlConfig: LazyDict = L(VideoConditionerWithCtrl)( |
|
text=TextConfig(), |
|
) |
|
|
|
VideoConditionerFpsSizePaddingWithCtrlConfig: LazyDict = L(VideoConditionerWithCtrl)( |
|
text=TextConfig(), |
|
fps=FPSConfig(), |
|
num_frames=NumFramesConfig(), |
|
image_size=ImageSizeConfig(), |
|
padding_mask=PaddingMaskConfig(), |
|
video_cond_bool=VideoCondBoolConfig(), |
|
) |
|
|
|
ViewConditionedVideoConditionerFpsSizePaddingWithCtrlConfig: LazyDict = L(ViewConditionedVideoConditionerWithCtrl)( |
|
text=TextConfig(), |
|
fps=FPSConfig(), |
|
num_frames=NumFramesConfig(), |
|
image_size=ImageSizeConfig(), |
|
padding_mask=PaddingMaskConfig(), |
|
video_cond_bool=VideoCondBoolConfig(), |
|
frame_repeat=FrameRepeatConfig(), |
|
) |
|
|