File size: 3,641 Bytes
1d01e07
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d34889d
 
 
 
 
 
 
1d01e07
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d34889d
1d01e07
0628786
1d01e07
 
 
 
d34889d
 
1d01e07
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d34889d
 
1d01e07
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2a4fc36
 
 
 
 
 
1d01e07
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import gradio as gr
import PIL
import spaces
import torch
from hi_diffusers import HiDreamImagePipeline, HiDreamImageTransformer2DModel
from hi_diffusers.schedulers.fm_solvers_unipc import FlowUniPCMultistepScheduler
from transformers import AutoTokenizer, LlamaForCausalLM

# Constants
MODEL_PREFIX: str = "HiDream-ai"
LLAMA_MODEL_NAME: str = "meta-llama/Meta-Llama-3.1-8B-Instruct"
MODEL_PATH = "HiDream-ai/HiDream-I1-full"
MODEL_CONFIGS = {
    "guidance_scale": 5.0,
    "num_inference_steps": 50,
    "shift": 3.0,
    "scheduler": FlowUniPCMultistepScheduler,
}

# Supported image sizes
RESOLUTION_OPTIONS: list[str] = [
    "1024 x 1024",
    "768 x 1360",
    "1360 x 768",
    "880 x 1168",
    "1168 x 880",
    "1248 x 832",
    "832 x 1248",
]


tokenizer = AutoTokenizer.from_pretrained(LLAMA_MODEL_NAME, use_fast=False)
text_encoder = LlamaForCausalLM.from_pretrained(
    LLAMA_MODEL_NAME,
    output_hidden_states=True,
    output_attentions=True,
    torch_dtype=torch.bfloat16,
).to("cuda")

transformer = HiDreamImageTransformer2DModel.from_pretrained(
    MODEL_PATH,
    subfolder="transformer",
    torch_dtype=torch.bfloat16,
).to("cuda")

scheduler = MODEL_CONFIGS["scheduler"](
    num_train_timesteps=1000,
    shift=MODEL_CONFIGS["shift"],
    use_dynamic_shifting=False,
)

pipe = HiDreamImagePipeline.from_pretrained(
    MODEL_PATH,
    scheduler=scheduler,
    tokenizer_4=tokenizer,
    text_encoder_4=text_encoder,
    torch_dtype=torch.bfloat16,
).to("cuda", torch.bfloat16)

pipe.transformer = transformer


@spaces.GPU(duration=120)
def generate_image(
    prompt: str, resolution: str, seed: int, progress=gr.Progress(track_tqdm=True)
) -> tuple[PIL.Image.Image, int]:
    if seed == -1:
        seed = torch.randint(0, 1_000_000, (1,)).item()

    msg = "ℹ️ This spaces currently crash because of the memory usage. Please help me fix 😅"
    raise gr.Error(msg, duration=10)
    height, width = tuple(map(int, resolution.replace(" ", "").split("x")))
    generator = torch.Generator("cuda").manual_seed(seed)

    image = pipe(
        prompt=prompt,
        height=height,
        width=width,
        guidance_scale=MODEL_CONFIGS["guidance_scale"],
        num_inference_steps=MODEL_CONFIGS["num_inference_steps"],
        generator=generator,
    ).images[0]

    torch.cuda.empty_cache()
    return image, seed


# Gradio UI
with gr.Blocks(title="HiDream Image Generator Full") as demo:
    gr.Markdown("## 🌈 HiDream Image Generator Full")

    with gr.Row():
        with gr.Column():
            prompt = gr.Textbox(
                label="Prompt",
                placeholder="e.g. A futuristic city with floating cars at sunset",
                lines=3,
            )

            resolution = gr.Radio(
                choices=RESOLUTION_OPTIONS,
                value=RESOLUTION_OPTIONS[0],
                label="Resolution",
            )

            seed = gr.Number(label="Seed (-1 for random)", value=-1, precision=0)
            # generate_btn = gr.Button("Generate Image", variant="primary")
            generate_btn = gr.Button(
                "This space currently crash because of the memory usage. Please help me fix 😅",
                variant="primary",
                interactive=False,
            )
            seed_used = gr.Number(label="Seed Used", interactive=False)

        with gr.Column():
            output_image = gr.Image(label="Generated Image", type="pil")

    generate_btn.click(
        fn=generate_image,
        inputs=[prompt, resolution, seed],
        outputs=[output_image, seed_used],
    )

if __name__ == "__main__":
    demo.launch()