bweigel LuChengTHU commited on
Commit
9e36867
·
0 Parent(s):

Duplicate from LuChengTHU/dpmsolver_sdm

Browse files

Co-authored-by: LuChengTHU <[email protected]>

Files changed (5) hide show
  1. .gitattributes +34 -0
  2. README.md +13 -0
  3. app.py +276 -0
  4. nsfw.png +0 -0
  5. requirements.txt +6 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Dpmsolver Sdm
3
+ emoji: 💻
4
+ colorFrom: gray
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 3.9
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: LuChengTHU/dpmsolver_sdm
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from diffusers import AutoencoderKL, UNet2DConditionModel, StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, DPMSolverMultistepScheduler
2
+ import gradio as gr
3
+ import torch
4
+ from PIL import Image
5
+ import os
6
+
7
+ scheduler = DPMSolverMultistepScheduler(
8
+ beta_start=0.00085,
9
+ beta_end=0.012,
10
+ beta_schedule="scaled_linear",
11
+ num_train_timesteps=1000,
12
+ trained_betas=None,
13
+ predict_epsilon=True,
14
+ thresholding=False,
15
+ algorithm_type="dpmsolver++",
16
+ solver_type="midpoint",
17
+ lower_order_final=True,
18
+ )
19
+
20
+ class Model:
21
+ def __init__(self, name, path, prefix):
22
+ self.name = name
23
+ self.path = path
24
+ self.prefix = prefix
25
+ self.pipe_t2i = None
26
+ self.pipe_i2i = None
27
+
28
+ models = [
29
+ Model("Stable-Diffusion-v1.4", "CompVis/stable-diffusion-v1-4", "The 1.4 version of official stable-diffusion"),
30
+ Model("Waifu", "hakurei/waifu-diffusion", "anime style"),
31
+ ]
32
+
33
+ last_mode = "txt2img"
34
+ current_model = models[0]
35
+ current_model_path = current_model.path
36
+
37
+ auth_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
38
+
39
+ print(f"Is CUDA available: {torch.cuda.is_available()}")
40
+
41
+ if torch.cuda.is_available():
42
+ vae = AutoencoderKL.from_pretrained(current_model.path, subfolder="vae", torch_dtype=torch.float16, use_auth_token=auth_token)
43
+ for model in models:
44
+ try:
45
+ unet = UNet2DConditionModel.from_pretrained(model.path, subfolder="unet", torch_dtype=torch.float16, use_auth_token=auth_token)
46
+ model.pipe_t2i = StableDiffusionPipeline.from_pretrained(model.path, unet=unet, vae=vae, torch_dtype=torch.float16, scheduler=scheduler, use_auth_token=auth_token)
47
+ model.pipe_i2i = StableDiffusionImg2ImgPipeline.from_pretrained(model.path, unet=unet, vae=vae, torch_dtype=torch.float16, scheduler=scheduler, use_auth_token=auth_token)
48
+ except:
49
+ models.remove(model)
50
+ pipe = models[0].pipe_t2i
51
+ pipe = pipe.to("cuda")
52
+
53
+ else:
54
+ vae = AutoencoderKL.from_pretrained(current_model.path, subfolder="vae", use_auth_token=auth_token)
55
+ for model in models:
56
+ try:
57
+ unet = UNet2DConditionModel.from_pretrained(model.path, subfolder="unet", use_auth_token=auth_token)
58
+ model.pipe_t2i = StableDiffusionPipeline.from_pretrained(model.path, unet=unet, vae=vae, scheduler=scheduler, use_auth_token=auth_token)
59
+ model.pipe_i2i = StableDiffusionImg2ImgPipeline.from_pretrained(model.path, unet=unet, vae=vae, scheduler=scheduler, use_auth_token=auth_token)
60
+ except:
61
+ models.remove(model)
62
+ pipe = models[0].pipe_t2i
63
+ pipe = pipe.to("cpu")
64
+
65
+ device = "GPU 🔥" if torch.cuda.is_available() else "CPU 🥶"
66
+
67
+ def inference(model_name, prompt, guidance, steps, width=512, height=512, seed=0, img=None, strength=0.5, neg_prompt=""):
68
+
69
+ global current_model
70
+ for model in models:
71
+ if model.name == model_name:
72
+ current_model = model
73
+ model_path = current_model.path
74
+
75
+ generator = torch.Generator('cuda' if torch.cuda.is_available() else 'cpu').manual_seed(seed) if seed != 0 else None
76
+
77
+ if img is not None:
78
+ return img_to_img(model_path, prompt, neg_prompt, img, strength, guidance, steps, width, height, generator)
79
+ else:
80
+ return txt_to_img(model_path, prompt, neg_prompt, guidance, steps, width, height, generator)
81
+
82
+ def txt_to_img(model_path, prompt, neg_prompt, guidance, steps, width, height, generator=None):
83
+
84
+ global last_mode
85
+ global pipe
86
+ global current_model_path
87
+ if model_path != current_model_path or last_mode != "txt2img":
88
+ current_model_path = model_path
89
+
90
+ pipe.to("cpu")
91
+ pipe = current_model.pipe_t2i
92
+
93
+ if torch.cuda.is_available():
94
+ pipe = pipe.to("cuda")
95
+ last_mode = "txt2img"
96
+
97
+ prompt = current_model.prefix + prompt
98
+ result = pipe(
99
+ prompt,
100
+ negative_prompt = neg_prompt,
101
+ # num_images_per_prompt=n_images,
102
+ num_inference_steps = int(steps),
103
+ guidance_scale = guidance,
104
+ width = width,
105
+ height = height,
106
+ generator = generator)
107
+
108
+ return replace_nsfw_images(result)
109
+
110
+ def img_to_img(model_path, prompt, neg_prompt, img, strength, guidance, steps, width, height, generator=None):
111
+
112
+ global last_mode
113
+ global pipe
114
+ global current_model_path
115
+ if model_path != current_model_path or last_mode != "img2img":
116
+ current_model_path = model_path
117
+
118
+ pipe.to("cpu")
119
+ pipe = current_model.pipe_i2i
120
+
121
+ if torch.cuda.is_available():
122
+ pipe = pipe.to("cuda")
123
+ last_mode = "img2img"
124
+
125
+ prompt = current_model.prefix + prompt
126
+ ratio = min(height / img.height, width / img.width)
127
+ img = img.resize((int(img.width * ratio), int(img.height * ratio)), Image.LANCZOS)
128
+ result = pipe(
129
+ prompt,
130
+ negative_prompt = neg_prompt,
131
+ # num_images_per_prompt=n_images,
132
+ init_image = img,
133
+ num_inference_steps = int(steps),
134
+ strength = strength,
135
+ guidance_scale = guidance,
136
+ width = width,
137
+ height = height,
138
+ generator = generator)
139
+
140
+ return replace_nsfw_images(result)
141
+
142
+ def replace_nsfw_images(results):
143
+ for i in range(len(results.images)):
144
+ if results.nsfw_content_detected[i]:
145
+ results.images[i] = Image.open("nsfw.png")
146
+ return results.images[0]
147
+
148
+ css = """
149
+ <style>
150
+ .finetuned-diffusion-div {
151
+ text-align: center;
152
+ max-width: 700px;
153
+ margin: 0 auto;
154
+ font-family: 'IBM Plex Sans', sans-serif;
155
+ }
156
+ .finetuned-diffusion-div div {
157
+ display: inline-flex;
158
+ align-items: center;
159
+ gap: 0.8rem;
160
+ font-size: 1.75rem;
161
+ }
162
+ .finetuned-diffusion-div div h1 {
163
+ font-weight: 900;
164
+ margin-top: 15px;
165
+ margin-bottom: 15px;
166
+ text-align: center;
167
+ }
168
+ .finetuned-diffusion-div p {
169
+ margin-bottom: 10px;
170
+ font-size: 94%;
171
+ }
172
+ .finetuned-diffusion-div p a {
173
+ text-decoration: underline;
174
+ }
175
+ .tabs {
176
+ margin-top: 0px;
177
+ margin-bottom: 0px;
178
+ }
179
+ #gallery {
180
+ min-height: 20rem;
181
+ }
182
+ .container {
183
+ max-width: 1000px;
184
+ margin: auto;
185
+ padding-top: 1.5rem;
186
+ }
187
+ </style>
188
+ """
189
+ with gr.Blocks(css=css) as demo:
190
+ gr.HTML(
191
+ f"""
192
+ <div class="finetuned-diffusion-div">
193
+ <div>
194
+ <h1>Stable-Diffusion with DPM-Solver (fastest sampler for diffusion models) </h1>
195
+ </div>
196
+ <br>
197
+ <p>
198
+ ❤️ Acknowledgement: Hardware resources of this demo are supported by HuggingFace 🤗 . Many thanks for the help!
199
+ </p>
200
+ <br>
201
+ <p>
202
+ This is a demo of sampling by DPM-Solver with two variants of Stable Diffusion models, including <a href="https://huggingface.co/CompVis/stable-diffusion-v1-4">Stable-Diffusion-v1.4</a> and <a href="https://huggingface.co/hakurei/waifu-diffusion">Waifu</a>.
203
+ </p>
204
+ <br>
205
+ <p>
206
+ <a href="https://github.com/LuChengTHU/dpm-solver">DPM-Solver</a> (Neurips 2022 Oral) is a fast high-order solver customized for diffusion ODEs, which can generate high-quality samples by diffusion models within only 10-25 steps. DPM-Solver has an analytical formulation and is very easy to use for all types of Gaussian diffusion models, and includes <a href="https://arxiv.org/abs/2010.02502">DDIM</a> as a first-order special case.
207
+ </p>
208
+ <p>
209
+ We use <a href="https://github.com/huggingface/diffusers">Diffusers</a> 🧨 to implement this demo, which currently supports the multistep DPM-Solver scheduler. For more details of DPM-Solver with Diffusers, check <a href="https://github.com/huggingface/diffusers/pull/1132">this pull request</a>.
210
+ </p>
211
+ <br>
212
+ <p>
213
+ Currently, the default sampler of stable-diffusion is <a href="https://arxiv.org/abs/2202.09778">PNDM</a>, which needs 50 steps to generate high-quality samples. However, DPM-Solver can generate high-quality samples within only <span style="font-weight: bold;">20-25</span> steps, and for some samples even within <span style="font-weight: bold;">10-15</span> steps.
214
+ </p>
215
+ <br>
216
+ <p>
217
+ Running on <b>{device}</b>
218
+ </p>
219
+ </div>
220
+ """
221
+ )
222
+
223
+ with gr.Row():
224
+
225
+ with gr.Column(scale=55):
226
+ with gr.Group():
227
+ model_name = gr.Dropdown(label="Model", choices=[m.name for m in models], value=current_model.name)
228
+ with gr.Row():
229
+ prompt = gr.Textbox(label="Prompt", show_label=False, max_lines=2,placeholder="Enter prompt. Style applied automatically").style(container=False)
230
+ generate = gr.Button(value="Generate").style(rounded=(False, True, True, False))
231
+
232
+
233
+ image_out = gr.Image(height=512)
234
+ # gallery = gr.Gallery(
235
+ # label="Generated images", show_label=False, elem_id="gallery"
236
+ # ).style(grid=[1], height="auto")
237
+
238
+ with gr.Column(scale=45):
239
+ with gr.Tab("Options"):
240
+ with gr.Group():
241
+ neg_prompt = gr.Textbox(label="Negative prompt", placeholder="What to exclude from the image")
242
+
243
+ # n_images = gr.Slider(label="Images", value=1, minimum=1, maximum=4, step=1)
244
+
245
+ with gr.Row():
246
+ guidance = gr.Slider(label="Guidance scale", value=7.5, maximum=15)
247
+ steps = gr.Slider(label="Steps", value=25, minimum=2, maximum=100, step=1)
248
+
249
+ with gr.Row():
250
+ width = gr.Slider(label="Width", value=512, minimum=64, maximum=1024, step=8)
251
+ height = gr.Slider(label="Height", value=512, minimum=64, maximum=1024, step=8)
252
+
253
+ seed = gr.Slider(0, 2147483647, label='Seed (0 = random)', value=0, step=1)
254
+
255
+ with gr.Tab("Image to image"):
256
+ with gr.Group():
257
+ image = gr.Image(label="Image", height=256, tool="editor", type="pil")
258
+ strength = gr.Slider(label="Transformation strength", minimum=0, maximum=1, step=0.01, value=0.5)
259
+
260
+ # model_name.change(lambda x: gr.update(visible = x == models[0].name), inputs=model_name, outputs=custom_model_group)
261
+
262
+ inputs = [model_name, prompt, guidance, steps, width, height, seed, image, strength, neg_prompt]
263
+ prompt.submit(inference, inputs=inputs, outputs=image_out)
264
+
265
+ generate.click(inference, inputs=inputs, outputs=image_out)
266
+
267
+
268
+ gr.Markdown('''
269
+ Stable-diffusion Models by [CompVis](https://huggingface.co/CompVis) and [stabilityai](https://huggingface.co/stabilityai), Waifu-diffusion models by [@hakurei](https://huggingface.co/hakurei). Most of the code of this demo are copied from [@anzorq's fintuned-diffusion](https://huggingface.co/spaces/anzorq/finetuned_diffusion/tree/main) ❤️<br>
270
+ Space by [Cheng Lu](https://github.com/LuChengTHU). [![Twitter Follow](https://img.shields.io/twitter/follow/ChengLu05671218?label=%40ChengLu&style=social)](https://twitter.com/ChengLu05671218)
271
+
272
+ ![visitors](https://visitor-badge.glitch.me/badge?page_id=LuChengTHU.dpmsolver_sdm)
273
+ ''')
274
+
275
+ demo.queue(concurrency_count=1)
276
+ demo.launch(debug=False, share=False)
nsfw.png ADDED
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ torch
2
+ git+https://github.com/huggingface/diffusers.git
3
+ transformers
4
+ scipy
5
+ ftfy
6
+ accelerate