File size: 5,078 Bytes
5d32408
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# Adapted from https://github.com/luosiallen/latent-consistency-model
from __future__ import annotations

import os
import random
from omegaconf import OmegaConf

import numpy as np

try:
    import intel_extension_for_pytorch as ipex
except:
    pass

from .utils.lora import collapse_lora, monkeypatch_remove_lora
from .utils.lora_handler import LoraHandler
from .utils.common_utils import load_model_checkpoint
from .utils.utils import instantiate_from_config
from .scheduler.t2v_turbo_scheduler import T2VTurboScheduler
from .pipeline.t2v_turbo_vc2_pipeline import T2VTurboVC2Pipeline

import torch

DESCRIPTION = """# T2V-Turbo πŸš€
We provide T2V-Turbo (VC2) distilled from [VideoCrafter2](https://ailab-cvc.github.io/videocrafter2/) with the reward feedback from [HPSv2.1](https://github.com/tgxs002/HPSv2/tree/master) and [InternVid2 Stage 2 Model](https://huggingface.co/OpenGVLab/InternVideo2-Stage2_1B-224p-f4).

You can download the the models from [here](https://huggingface.co/jiachenli-ucsb/T2V-Turbo-VC2). Check out our [Project page](https://t2v-turbo.github.io) πŸ˜„
"""
if torch.cuda.is_available():
    DESCRIPTION += "\n<p>Running on CUDA πŸ˜€</p>"
elif hasattr(torch, "xpu") and torch.xpu.is_available():
    DESCRIPTION += "\n<p>Running on XPU πŸ€“</p>"
else:
    DESCRIPTION += "\n<p>Running on CPU πŸ₯Ά This demo does not work on CPU.</p>"

MAX_SEED = np.iinfo(np.int32).max
CACHE_EXAMPLES = torch.cuda.is_available() and os.getenv("CACHE_EXAMPLES") == "1"
USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE") == "1"

"""
Operation System Options:
    If you are using MacOS, please set the following (device="mps") ;
    If you are using Linux & Windows with Nvidia GPU, please set the device="cuda";
    If you are using Linux & Windows with Intel Arc GPU, please set the device="xpu";
"""
# device = "mps"    # MacOS
# device = "xpu"    # Intel Arc GPU
device = "cuda"  # Linux & Windows

"""
   DTYPE Options:
      To reduce GPU memory you can set "DTYPE=torch.float16",
      but image quality might be compromised
"""
DTYPE = (
    torch.float16
)  # torch.float16 works as well, but pictures seem to be a bit worse


def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    return seed


class T2VTurboVC2Pipeline1:
    def __init__(self, config, merged, device, unet_dir, base_model_dir):
        config = OmegaConf.create(config)
        model_config = config.pop("model", OmegaConf.create())
        pretrained_t2v = instantiate_from_config(model_config)

        unet_config = model_config["params"]["unet_config"]
        unet_config["params"]["time_cond_proj_dim"] = 256
        unet = instantiate_from_config(unet_config)

        if merged:
            pretrained_t2v.model.diffusion_model = unet
            pretrained_t2v = load_model_checkpoint(pretrained_t2v, base_model_dir)

        else:
            pretrained_t2v = load_model_checkpoint(pretrained_t2v, base_model_dir)

            unet.load_state_dict(
                pretrained_t2v.model.diffusion_model.state_dict(), strict=False
            )

            use_unet_lora = True
            lora_manager = LoraHandler(
                version="cloneofsimo",
                use_unet_lora=use_unet_lora,
                save_for_webui=True,
                unet_replace_modules=["UNetModel"],
            )
            lora_manager.add_lora_to_model(
                use_unet_lora,
                unet,
                lora_manager.unet_replace_modules,
                lora_path=unet_dir,
                dropout=0.1,
                r=64,
            )
            unet.eval()
            collapse_lora(unet, lora_manager.unet_replace_modules)
            monkeypatch_remove_lora(unet)

            pretrained_t2v.model.diffusion_model = unet

        scheduler = T2VTurboScheduler(
            linear_start=model_config["params"]["linear_start"],
            linear_end=model_config["params"]["linear_end"],
        )
        self.pipeline = T2VTurboVC2Pipeline(pretrained_t2v, scheduler, model_config)
        self.pipeline.to(device)

    def inference(
            self,
            prompt: str,
            height: int = 320,
            width: int = 512,
            seed: int = 0,
            guidance_scale: float = 7.5,
            num_inference_steps: int = 4,
            num_frames: int = 16,
            fps: int = 16,
            randomize_seed: bool = False,
            param_dtype="torch.float16"
    ):
        seed = randomize_seed_fn(seed, randomize_seed)
        torch.manual_seed(seed)
        self.pipeline.to(
            torch_device=device,
            torch_dtype=torch.float16 if param_dtype == "torch.float16" else torch.float32,
        )

        result = self.pipeline(
            prompt=prompt,
            height=height,
            width=width,
            frames=num_frames,
            fps=fps,
            guidance_scale=guidance_scale,
            num_inference_steps=num_inference_steps,
            num_videos_per_prompt=1,
        )

        return result