File size: 3,402 Bytes
18d2806
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4ece20c
18d2806
4ece20c
18d2806
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
# except for the third-party components listed below.
# Hunyuan 3D does not impose any additional limitations beyond what is outlined
# in the repsective licenses of these third-party components.
# Users must comply with all terms and conditions of original licenses of these third-party
# components and must ensure that the usage of the third party components adheres to
# all relevant laws and regulations.

# For avoidance of doubts, Hunyuan 3D means the large language models and
# their software and algorithms, including trained model weights, parameters (including
# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
# fine-tuning enabling code and other elements of the foregoing made publicly available
# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.

import os
import random

import numpy as np
import torch
from diffusers import AutoPipelineForText2Image


def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    os.environ["PL_GLOBAL_SEED"] = str(seed)


class HunyuanDiTPipeline:
    def __init__(
        self,
        model_path="Tencent-Hunyuan/HunyuanDiT-v1.1-Diffusers-Distilled",
        device='cuda'
    ):
        self.device = device
        self.pipe = AutoPipelineForText2Image.from_pretrained(
            model_path,
            torch_dtype=torch.float16,
            enable_pag=True,
            pag_applied_layers=["blocks.(16|17|18|19)"]
        ).to(device)
        self.pos_txt = ",็™ฝ่‰ฒ่ƒŒๆ™ฏ,3D้ฃŽๆ ผ,ๆœ€ไฝณ่ดจ้‡"
        self.neg_txt = "ๆ–‡ๆœฌ,็‰นๅ†™,่ฃๅ‰ช,ๅ‡บๆก†,ๆœ€ๅทฎ่ดจ้‡,ไฝŽ่ดจ้‡,JPEGไผชๅฝฑ,PGLY,้‡ๅค,็—…ๆ€," \
                       "ๆฎ‹็ผบ,ๅคšไฝ™็š„ๆ‰‹ๆŒ‡,ๅ˜ๅผ‚็š„ๆ‰‹,็”ปๅพ—ไธๅฅฝ็š„ๆ‰‹,็”ปๅพ—ไธๅฅฝ็š„่„ธ,ๅ˜ๅผ‚,็•ธๅฝข,ๆจก็ณŠ,่„ฑๆฐด,็ณŸ็ณ•็š„่งฃๅ‰–ๅญฆ," \
                       "็ณŸ็ณ•็š„ๆฏ”ไพ‹,ๅคšไฝ™็š„่‚ขไฝ“,ๅ…‹้š†็š„่„ธ,ๆฏๅฎน,ๆถๅฟƒ็š„ๆฏ”ไพ‹,็•ธๅฝข็š„่‚ขไฝ“,็ผบๅคฑ็š„ๆ‰‹่‡‚,็ผบๅคฑ็š„่…ฟ," \
                       "้ขๅค–็š„ๆ‰‹่‡‚,้ขๅค–็š„่…ฟ,่žๅˆ็š„ๆ‰‹ๆŒ‡,ๆ‰‹ๆŒ‡ๅคชๅคš,้•ฟ่„–ๅญ"

    def compile(self):
        # accelarate hunyuan-dit transformer,first inference will cost long time
        torch.set_float32_matmul_precision('high')
        self.pipe.transformer = torch.compile(self.pipe.transformer, fullgraph=True)
        # self.pipe.vae.decode = torch.compile(self.pipe.vae.decode, fullgraph=True)
        generator = torch.Generator(device=self.pipe.device)  # infer once for hot-start
        out_img = self.pipe(
            prompt='็พŽๅฐ‘ๅฅณๆˆ˜ๅฃซ',
            negative_prompt='ๆจก็ณŠ',
            num_inference_steps=25,
            pag_scale=1.3,
            width=1024,
            height=1024,
            generator=generator,
            return_dict=False
        )[0][0]

    @torch.no_grad()
    def __call__(self, prompt, seed=0):
        seed_everything(seed)
        generator = torch.Generator(device=self.pipe.device)
        generator = generator.manual_seed(int(seed))
        out_img = self.pipe(
            prompt=prompt[:60] + self.pos_txt,
            negative_prompt=self.neg_txt,
            num_inference_steps=25,
            pag_scale=1.3,
            width=1024,
            height=1024,
            generator=generator,
            return_dict=False
        )[0][0]
        return out_img