Plat commited on
Commit
84f584e
·
1 Parent(s): b102718
Files changed (8) hide show
  1. .gitignore +5 -0
  2. .python-version +1 -0
  3. adapter.py +44 -0
  4. app.py +194 -129
  5. example.py +31 -0
  6. pyproject.toml +24 -0
  7. requirements.txt +3 -3
  8. uv.lock +0 -0
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ __pycache__
2
+
3
+ .env.*
4
+
5
+ .DS_Store
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.11
adapter.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import yaml
3
+
4
+ from huggingface_hub import hf_hub_download
5
+
6
+ from src.models.sdxl.adapter.ip_adapter import (
7
+ SDXLModelWithIPAdapter,
8
+ SDXLModelWithIPAdapterConfig,
9
+ )
10
+
11
+
12
+ def _load_config(config_path: str, model_path: str, adapter_path: str):
13
+ with open(config_path, "r") as f:
14
+ config = yaml.safe_load(f)
15
+
16
+ config = SDXLModelWithIPAdapterConfig(**config)
17
+ config.checkpoint_path = model_path
18
+ config.adapter.checkpoint_weight = adapter_path
19
+
20
+ return config
21
+
22
+
23
+ def load_ip_adapter_model(
24
+ model_path: str,
25
+ config_path: str,
26
+ adapter_path: str,
27
+ ):
28
+ config = _load_config(config_path, model_path, adapter_path)
29
+ model = SDXLModelWithIPAdapter.from_checkpoint(config)
30
+
31
+ return model
32
+
33
+
34
+ def get_file_path(repo_id: str, filename: str, revision: str = "main") -> str:
35
+ if os.path.exists(filename):
36
+ return filename
37
+
38
+ file_path = hf_hub_download(
39
+ repo_id=repo_id,
40
+ filename=filename,
41
+ revision=revision,
42
+ )
43
+
44
+ return file_path
app.py CHANGED
@@ -1,154 +1,219 @@
1
- import gradio as gr
2
- import numpy as np
 
3
  import random
 
4
 
5
- # import spaces #[uncomment to use ZeroGPU]
6
- from diffusers import DiffusionPipeline
7
  import torch
 
 
8
 
9
- device = "cuda" if torch.cuda.is_available() else "cpu"
10
- model_repo_id = "stabilityai/sdxl-turbo" # Replace to the model you would like to use
11
 
12
- if torch.cuda.is_available():
13
- torch_dtype = torch.float16
14
- else:
15
- torch_dtype = torch.float32
16
 
17
- pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
18
- pipe = pipe.to(device)
 
19
 
20
- MAX_SEED = np.iinfo(np.int32).max
21
- MAX_IMAGE_SIZE = 1024
 
22
 
 
 
 
 
 
 
23
 
24
- # @spaces.GPU #[uncomment to use ZeroGPU]
25
- def infer(
26
- prompt,
27
- negative_prompt,
28
- seed,
29
- randomize_seed,
30
- width,
31
- height,
32
- guidance_scale,
33
- num_inference_steps,
34
- progress=gr.Progress(track_tqdm=True),
35
- ):
36
- if randomize_seed:
37
- seed = random.randint(0, MAX_SEED)
38
-
39
- generator = torch.Generator().manual_seed(seed)
40
 
41
- image = pipe(
42
- prompt=prompt,
43
- negative_prompt=negative_prompt,
44
- guidance_scale=guidance_scale,
45
- num_inference_steps=num_inference_steps,
46
- width=width,
47
- height=height,
48
- generator=generator,
49
- ).images[0]
50
 
51
- return image, seed
 
52
 
 
53
 
54
- examples = [
55
- "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
56
- "An astronaut riding a green horse",
57
- "A delicious ceviche cheesecake slice",
58
- ]
 
59
 
60
- css = """
61
- #col-container {
62
- margin: 0 auto;
63
- max-width: 640px;
64
- }
65
- """
66
 
67
- with gr.Blocks(css=css) as demo:
68
- with gr.Column(elem_id="col-container"):
69
- gr.Markdown(" # Text-to-Image Gradio Template")
 
 
 
 
 
 
 
 
 
 
 
 
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  with gr.Row():
72
- prompt = gr.Text(
73
- label="Prompt",
74
- show_label=False,
75
- max_lines=1,
76
- placeholder="Enter your prompt",
77
- container=False,
78
- )
79
-
80
- run_button = gr.Button("Run", scale=0, variant="primary")
81
-
82
- result = gr.Image(label="Result", show_label=False)
83
-
84
- with gr.Accordion("Advanced Settings", open=False):
85
- negative_prompt = gr.Text(
86
- label="Negative prompt",
87
- max_lines=1,
88
- placeholder="Enter a negative prompt",
89
- visible=False,
90
- )
91
-
92
- seed = gr.Slider(
93
- label="Seed",
94
- minimum=0,
95
- maximum=MAX_SEED,
96
- step=1,
97
- value=0,
98
- )
99
-
100
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
101
-
102
- with gr.Row():
103
- width = gr.Slider(
104
- label="Width",
105
- minimum=256,
106
- maximum=MAX_IMAGE_SIZE,
107
- step=32,
108
- value=1024, # Replace with defaults that work for your model
109
  )
110
-
111
- height = gr.Slider(
112
- label="Height",
113
- minimum=256,
114
- maximum=MAX_IMAGE_SIZE,
115
- step=32,
116
- value=1024, # Replace with defaults that work for your model
117
  )
118
 
119
- with gr.Row():
120
- guidance_scale = gr.Slider(
121
- label="Guidance scale",
122
- minimum=0.0,
123
- maximum=10.0,
124
- step=0.1,
125
- value=0.0, # Replace with defaults that work for your model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  )
127
-
128
- num_inference_steps = gr.Slider(
129
- label="Number of inference steps",
130
- minimum=1,
131
- maximum=50,
132
- step=1,
133
- value=2, # Replace with defaults that work for your model
134
  )
135
 
136
- gr.Examples(examples=examples, inputs=[prompt])
137
- gr.on(
138
- triggers=[run_button.click, prompt.submit],
139
- fn=infer,
140
- inputs=[
141
- prompt,
142
- negative_prompt,
143
- seed,
144
- randomize_seed,
145
- width,
146
- height,
147
- guidance_scale,
148
- num_inference_steps,
149
- ],
150
- outputs=[result, seed],
151
- )
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
- if __name__ == "__main__":
154
  demo.launch()
 
 
 
 
 
1
+ import spaces
2
+
3
+ import os
4
  import random
5
+ from PIL import Image
6
 
 
 
7
  import torch
8
+ import gradio as gr
9
+ import dotenv
10
 
11
+ from adapter import load_ip_adapter_model, get_file_path
12
+ from example import EXAMPLES
13
 
14
+ dotenv.load_dotenv(".env.local")
 
 
 
15
 
16
+ ADAPTER_REPO_ID = os.environ.get("ADAPTER_REPO_ID")
17
+ ADAPTER_MODEL_PATH = os.environ.get("ADAPTER_MODEL_PATH")
18
+ ADAPTER_CONFIG_PATH = os.environ.get("ADAPTER_CONFIG_PATH")
19
 
20
+ assert ADAPTER_REPO_ID is not None
21
+ assert ADAPTER_MODEL_PATH is not None
22
+ assert ADAPTER_CONFIG_PATH is not None
23
 
24
+ BASE_MODEL_REPO_ID = os.environ.get(
25
+ "BASE_MODEL_REPO_ID", "p1atdev/animagine-xl-4.0-bnb-nf4"
26
+ )
27
+ BASE_MODEL_PATH = os.environ.get(
28
+ "BASE_MODEL_PATH", "animagine-xl-4.0-opt.bnb_nf4.safetensors"
29
+ )
30
 
31
+ INITIAL_BATCH_SIZE = int(os.environ.get("INITIAL_BATCH_SIZE", 1))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
 
 
 
 
 
 
 
 
 
33
 
34
+ adapter_model_path = get_file_path(ADAPTER_REPO_ID, ADAPTER_MODEL_PATH)
35
+ adapter_config_path = get_file_path(ADAPTER_REPO_ID, ADAPTER_CONFIG_PATH)
36
 
37
+ base_model_path = get_file_path(BASE_MODEL_REPO_ID, BASE_MODEL_PATH)
38
 
39
+ model = load_ip_adapter_model(
40
+ model_path=base_model_path,
41
+ config_path=adapter_config_path,
42
+ adapter_path=adapter_model_path,
43
+ )
44
+ model.to("cuda:0")
45
 
 
 
 
 
 
 
46
 
47
+ @spaces.GPU
48
+ def on_generate(
49
+ prompt: str,
50
+ negative_prompt: str,
51
+ image: Image.Image | None,
52
+ width: int,
53
+ height: int,
54
+ steps: int,
55
+ cfg_scale: float,
56
+ seed: int,
57
+ randomize_seed: bool = True,
58
+ num_images: int = 4,
59
+ ):
60
+ if image is not None:
61
+ image = image.convert("RGB")
62
 
63
+ if randomize_seed:
64
+ seed = random.randint(0, 2147483647)
65
+
66
+ with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
67
+ images = model.generate(
68
+ prompt=[prompt] * num_images, # batch size 4
69
+ negative_prompt=negative_prompt,
70
+ reference_image=image,
71
+ num_inference_steps=steps,
72
+ cfg_scale=cfg_scale,
73
+ width=width,
74
+ height=height,
75
+ seed=seed,
76
+ do_offloading=False,
77
+ device="cuda:0",
78
+ max_token_length=225,
79
+ execution_dtype=torch.bfloat16,
80
+ )
81
+
82
+ torch.cuda.empty_cache()
83
+
84
+ return images, seed
85
+
86
+
87
+ def main():
88
+ with gr.Blocks() as demo:
89
  with gr.Row():
90
+ with gr.Column():
91
+ prompt = gr.TextArea(
92
+ label="Prompt",
93
+ value="masterpiece, best quality",
94
+ placeholder="masterpiece, best quality",
95
+ interactive=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  )
97
+ input_image = gr.Image(
98
+ label="Reference Image",
99
+ type="pil",
100
+ height=600,
 
 
 
101
  )
102
 
103
+ with gr.Accordion("Negative Prompt", open=False):
104
+ negative_prompt = gr.TextArea(
105
+ label="Negative Prompt",
106
+ show_label=False,
107
+ value="lowres, bad anatomy, bad hands, text, error, missing finger, extra digits, fewer digits, cropped, worst quality, low quality, low score, bad score, average score, signature, watermark, username, blurry",
108
+ interactive=True,
109
+ )
110
+
111
+ with gr.Row():
112
+ width = gr.Slider(
113
+ label="Width",
114
+ minimum=256,
115
+ maximum=2048,
116
+ step=128,
117
+ value=896,
118
+ interactive=True,
119
+ )
120
+ height = gr.Slider(
121
+ label="Height",
122
+ minimum=256,
123
+ maximum=2048,
124
+ step=128,
125
+ value=1152,
126
+ interactive=True,
127
+ )
128
+
129
+ with gr.Accordion("Advanced options", open=False):
130
+ num_images = gr.Slider(
131
+ label="Number of images to generate",
132
+ minimum=1,
133
+ maximum=8,
134
+ step=1,
135
+ value=INITIAL_BATCH_SIZE,
136
+ interactive=True,
137
+ )
138
+
139
+ with gr.Row():
140
+ seed = gr.Slider(
141
+ label="Seed",
142
+ minimum=0,
143
+ maximum=2147483647,
144
+ step=1,
145
+ value=0,
146
+ )
147
+ randomize_seed = gr.Checkbox(
148
+ label="Randomize seed",
149
+ value=True,
150
+ interactive=True,
151
+ scale=1,
152
+ )
153
+
154
+ steps = gr.Slider(
155
+ label="Inference steps",
156
+ minimum=10,
157
+ maximum=50,
158
+ step=1,
159
+ value=25,
160
+ interactive=True,
161
+ )
162
+
163
+ cfg_scale = gr.Slider(
164
+ label="CFG scale",
165
+ minimum=3.0,
166
+ maximum=8.0,
167
+ step=0.5,
168
+ value=5.0,
169
+ interactive=True,
170
+ )
171
+
172
+ with gr.Column():
173
+ generate_button = gr.Button(
174
+ "Generate",
175
+ variant="primary",
176
  )
177
+ output_image = gr.Gallery(
178
+ label="Generated images",
179
+ type="pil",
180
+ rows=2,
181
+ height="768px",
182
+ preview=True,
183
+ show_label=True,
184
  )
185
 
186
+ comment = gr.Markdown(
187
+ label="Comment",
188
+ visible=False,
189
+ )
190
+
191
+ gr.Examples(
192
+ examples=EXAMPLES,
193
+ inputs=[input_image, prompt, width, height, comment],
194
+ cache_examples=False,
195
+ )
196
+
197
+ gr.on(
198
+ triggers=[generate_button.click],
199
+ fn=on_generate,
200
+ inputs=[
201
+ prompt,
202
+ negative_prompt,
203
+ input_image,
204
+ width,
205
+ height,
206
+ steps,
207
+ cfg_scale,
208
+ seed,
209
+ randomize_seed,
210
+ num_images,
211
+ ],
212
+ outputs=[output_image, seed],
213
+ )
214
 
 
215
  demo.launch()
216
+
217
+
218
+ if __name__ == "__main__":
219
+ main()
example.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ EXAMPLES = [
2
+ # image_path, prompt, width, height, comment
3
+ [
4
+ "./examples/marin.png",
5
+ "masterpiece, best quality",
6
+ 896,
7
+ 1152,
8
+ "Source: https://ninkoro.jp/chara/marin.html",
9
+ ],
10
+ [
11
+ "./examples/maid.jpg",
12
+ "1girl, solo, cowboy shot, hands up, heart hands, masterpiece, best quality",
13
+ 896,
14
+ 1152,
15
+ "Generated with AnimagineXL 4.0",
16
+ ],
17
+ [
18
+ "./examples/nai.png",
19
+ "masterpiece, best quality",
20
+ 896,
21
+ 1152,
22
+ "Generated with NovelAI Diffusion v4.5 Full",
23
+ ],
24
+ [
25
+ "./examples/ururu.png",
26
+ "masterpiece, best quality",
27
+ 896,
28
+ 1152,
29
+ "Source: https://www.irasutoya.com/2020/06/blog-post_169.html",
30
+ ],
31
+ ]
pyproject.toml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "animaginexl4-0-image-prompt"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ dependencies = [
8
+ "diffusers>=0.35.1",
9
+ "huggingface-hub>=0.34.4",
10
+ "safetensors>=0.6.2",
11
+ "spaces>=0.40.1",
12
+ "vision-ft",
13
+ ]
14
+
15
+ [dependency-groups]
16
+ dev = [
17
+ "gradio>=5.43.1",
18
+ "python-dotenv>=1.1.1",
19
+ "ruff>=0.12.10",
20
+ "ty>=0.0.1a19",
21
+ ]
22
+
23
+ [tool.uv.sources]
24
+ vision-ft = { path = "../vision-ft", editable = true }
requirements.txt CHANGED
@@ -1,6 +1,6 @@
 
1
  accelerate
2
  diffusers
3
- invisible_watermark
4
- torch
5
  transformers
6
- xformers
 
 
1
+ git+https://github.com/p1atdev/vision-ft@1175478
2
  accelerate
3
  diffusers
 
 
4
  transformers
5
+ spaces>=0.40.1
6
+ python-dotenv
uv.lock ADDED
The diff for this file is too large to render. See raw diff