NikhilJoson commited on
Commit
2f84586
·
verified ·
1 Parent(s): 53aed50

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -141
app.py CHANGED
@@ -1,48 +1,69 @@
1
  import gradio as gr
2
- from PIL import Image
3
  import os
4
  import random
 
5
  import spaces
6
-
 
7
  from OmniGen import OmniGenPipeline
8
 
9
- pipe = OmniGenPipeline.from_pretrained(
10
- "Shitao/OmniGen-v1"
11
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  @spaces.GPU(duration=180)
14
- def generate_image(text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale, inference_steps, seed, separate_cfg_infer, offload_model,
15
- use_input_image_size_as_output, max_input_image_size, randomize_seed):
 
16
  input_images = [img1, img2, img3]
17
  # Delete None
18
  input_images = [img for img in input_images if img is not None]
19
  if len(input_images) == 0:
20
  input_images = None
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  if randomize_seed:
23
  seed = random.randint(0, 10000000)
24
 
25
- output = pipe(
26
- prompt=text,
27
- input_images=input_images,
28
- height=height,
29
- width=width,
30
- guidance_scale=guidance_scale,
31
- img_guidance_scale=img_guidance_scale,
32
- num_inference_steps=inference_steps,
33
- separate_cfg_infer=separate_cfg_infer,
34
- use_kv_cache=True,
35
- offload_kv_cache=True,
36
- offload_model=offload_model,
37
- use_input_image_size_as_output=use_input_image_size_as_output,
38
- seed=seed,
39
- max_input_image_size=max_input_image_size,
40
- )
41
  img = output[0]
42
  return img
43
 
44
-
45
-
46
  def get_example():
47
  case = [
48
  [
@@ -245,164 +266,109 @@ def get_example():
245
  return case
246
 
247
  def run_for_examples(text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale, seed, max_input_image_size, randomize_seed, use_input_image_size_as_output):
248
- # 在函数内部设置默认值
249
  inference_steps = 50
250
  separate_cfg_infer = True
251
  offload_model = False
252
 
253
- return generate_image(
254
- text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale,
255
- inference_steps, seed, separate_cfg_infer, offload_model,
256
- use_input_image_size_as_output, max_input_image_size, randomize_seed
257
- )
258
 
259
  description = """
260
- OmniGen is a unified image generation model that you can use to perform various tasks, including but not limited to text-to-image generation, subject-driven generation, Identity-Preserving Generation, and image-conditioned generation.
261
- For multi-modal to image generation, you should pass a string as `prompt`, and a list of image paths as `input_images`. The placeholder in the prompt should be in the format of `<img><|image_*|></img>` (for the first image, the placeholder is <img><|image_1|></img>. for the second image, the the placeholder is <img><|image_2|></img>).
262
- For example, use an image of a woman to generate a new image:
263
- prompt = "A woman holds a bouquet of flowers and faces the camera. Thw woman is \<img\>\<|image_1|\>\</img\>."
 
 
264
  Tips:
265
  - For image editing task and controlnet task, we recommend setting the height and width of output image as the same as input image. For example, if you want to edit a 512x512 image, you should set the height and width of output image as 512x512. You also can set the `use_input_image_size_as_output` to automatically set the height and width of output image as the same as input image.
266
  - For out-of-memory or time cost, you can set `offload_model=True` or refer to [./docs/inference.md#requiremented-resources](https://github.com/VectorSpaceLab/OmniGen/blob/main/docs/inference.md#requiremented-resources) to select a appropriate setting.
267
  - If inference time is too long when inputting multiple images, please try to reduce the `max_input_image_size`. For more details please refer to [./docs/inference.md#requiremented-resources](https://github.com/VectorSpaceLab/OmniGen/blob/main/docs/inference.md#requiremented-resources).
268
- - Oversaturated: If the image appears oversaturated, please reduce the `guidance_scale`.
269
- - Low-quality: More detailed prompts will lead to better results.
270
- - Animate Style: If the generated images are in animate style, you can try to add `photo` to the prompt`.
271
- - Edit generated image. If you generate an image by omnigen and then want to edit it, you cannot use the same seed to edit this image. For example, use seed=0 to generate image, and should use seed=1 to edit this image.
272
- - For image editing tasks, we recommend placing the image before the editing instruction. For example, use `<img><|image_1|></img> remove suit`, rather than `remove suit <img><|image_1|></img>`.
273
-
274
 
275
  **HF Spaces often encounter errors due to quota limitations, so recommend to run it locally.**
276
  """
277
 
278
- article = """
279
- ---
280
- **Citation**
281
- <br>
282
- If you find this repository useful, please consider giving a star ⭐ and a citation
283
- ```
284
- @article{xiao2024omnigen,
285
- title={Omnigen: Unified image generation},
286
- author={Xiao, Shitao and Wang, Yueze and Zhou, Junjie and Yuan, Huaying and Xing, Xingrun and Yan, Ruiran and Wang, Shuting and Huang, Tiejun and Liu, Zheng},
287
- journal={arXiv preprint arXiv:2409.11340},
288
- year={2024}
289
- }
290
- ```
291
- **Contact**
292
- <br>
293
- If you have any questions, please feel free to open an issue or directly reach us out via email.
294
  """
295
 
296
 
297
  # Gradio
298
  with gr.Blocks() as demo:
299
- gr.Markdown("# OmniGen: Unified Image Generation [paper](https://arxiv.org/abs/2409.11340) [code](https://github.com/VectorSpaceLab/OmniGen)")
300
  gr.Markdown(description)
301
  with gr.Row():
302
- with gr.Column():
303
- # text prompt
304
- prompt_input = gr.Textbox(
305
- label="Enter your prompt, use <img><|image_i|></img> to represent i-th input image", placeholder="Type your prompt here..."
306
- )
307
-
308
- with gr.Row(equal_height=True):
309
- # input images
310
- image_input_1 = gr.Image(label="<img><|image_1|></img>", type="filepath")
311
- image_input_2 = gr.Image(label="<img><|image_2|></img>", type="filepath")
312
- image_input_3 = gr.Image(label="<img><|image_3|></img>", type="filepath")
313
 
314
- # slider
315
- height_input = gr.Slider(
316
- label="Height", minimum=128, maximum=2048, value=1024, step=16
317
- )
318
- width_input = gr.Slider(
319
- label="Width", minimum=128, maximum=2048, value=1024, step=16
320
- )
321
 
322
- guidance_scale_input = gr.Slider(
323
- label="Guidance Scale", minimum=1.0, maximum=5.0, value=2.5, step=0.1
324
- )
325
 
326
- img_guidance_scale_input = gr.Slider(
327
- label="img_guidance_scale", minimum=1.0, maximum=2.0, value=1.6, step=0.1
328
- )
329
 
330
- num_inference_steps = gr.Slider(
331
- label="Inference Steps", minimum=1, maximum=100, value=50, step=1
332
- )
333
 
334
- seed_input = gr.Slider(
335
- label="Seed", minimum=0, maximum=2147483647, value=42, step=1
336
- )
337
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
338
 
339
- max_input_image_size = gr.Slider(
340
- label="max_input_image_size", minimum=128, maximum=2048, value=1024, step=16
341
- )
342
 
343
- separate_cfg_infer = gr.Checkbox(
344
- label="separate_cfg_infer", info="Whether to use separate inference process for different guidance. This will reduce the memory cost.", value=True,
345
- )
346
- offload_model = gr.Checkbox(
347
- label="offload_model", info="Offload model to CPU, which will significantly reduce the memory cost but slow down the generation speed. You can cancel separate_cfg_infer and set offload_model=True. If both separate_cfg_infer and offload_model are True, further reduce the memory, but slowest generation", value=False,
348
- )
349
- use_input_image_size_as_output = gr.Checkbox(
350
- label="use_input_image_size_as_output", info="Automatically adjust the output image size to be same as input image size. For editing and controlnet task, it can make sure the output image has the same size as input image leading to better performance", value=False,
351
- )
352
 
353
- # generate
354
- generate_button = gr.Button("Generate Image")
355
 
356
-
357
- with gr.Column():
358
- # output image
359
- output_image = gr.Image(label="Output Image")
360
 
361
  # click
362
  generate_button.click(
363
  generate_image,
364
- inputs=[
365
- prompt_input,
366
- image_input_1,
367
- image_input_2,
368
- image_input_3,
369
- height_input,
370
- width_input,
371
- guidance_scale_input,
372
- img_guidance_scale_input,
373
- num_inference_steps,
374
- seed_input,
375
- separate_cfg_infer,
376
- offload_model,
377
- use_input_image_size_as_output,
378
- max_input_image_size,
379
- randomize_seed,
380
- ],
381
  outputs=output_image,
382
  )
383
 
384
  gr.Examples(
385
  examples=get_example(),
386
  fn=run_for_examples,
387
- inputs=[
388
- prompt_input,
389
- image_input_1,
390
- image_input_2,
391
- image_input_3,
392
- height_input,
393
- width_input,
394
- guidance_scale_input,
395
- img_guidance_scale_input,
396
- seed_input,
397
- max_input_image_size,
398
- randomize_seed,
399
- use_input_image_size_as_output,
400
- ],
401
  outputs=output_image,
402
  )
403
 
404
- gr.Markdown(article)
405
 
406
  # launch
407
- demo.launch()
408
-
 
1
  import gradio as gr
 
2
  import os
3
  import random
4
+ from PIL import Image
5
  import spaces
6
+ import torch
7
+ from transformers import MllamaForConditionalGeneration, AutoProcessor
8
  from OmniGen import OmniGenPipeline
9
 
10
+ pipe = OmniGenPipeline.from_pretrained("Shitao/OmniGen-v1")
11
+ model_id = "meta-llama/Llama-3.2-11B-Vision-Instruct"
12
+ model = MllamaForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto",)
13
+ processor = AutoProcessor.from_pretrained(model_id)
14
+
15
+ @spaces.GPU()
16
+ def predict_clothing(images):
17
+ messages = [{"role": "user", "content":
18
+ [
19
+ {"type": "image"},
20
+ {"type": "text", "text": "Define this clothing in 1-3 words. Your response should be only the definition."}
21
+ ]}
22
+ ]
23
+ input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
24
+
25
+ output_texts = []
26
+ for image in images:
27
+ inputs = processor(image, input_text, add_special_tokens=False, return_tensors="pt").to(model.device)
28
+ with torch.no_grad():
29
+ output = model.generate(**inputs, max_new_tokens=30)
30
+ output_texts.append(str(processor.decode(output[0])))
31
+
32
+ return output_texts
33
+
34
 
35
  @spaces.GPU(duration=180)
36
+ def generate_image(img1, img2, img3, height, width, img_guidance_scale, inference_steps, seed, separate_cfg_infer, offload_model,
37
+ use_input_image_size_as_output, max_input_image_size, randomize_seed, guidance_scale=3.5):
38
+
39
  input_images = [img1, img2, img3]
40
  # Delete None
41
  input_images = [img for img in input_images if img is not None]
42
  if len(input_images) == 0:
43
  input_images = None
44
+
45
+ wears = predict_clothing(input_images[1:])
46
+ if len(wears)==1:
47
+ dress = wears[0]
48
+ text = """A male wearing a {dress}. The male is in <img><|image_1|></img>. The {dress} is in <img><|image_2|></img>."""
49
+ elif len(wears)==2:
50
+ topwear, bottomwear = wears[0], wears[1]
51
+ text = """A male wearing a {topwear} and a {bottomwear}. The male is in <img><|image_1|></img>.
52
+ The {topwear} is in <img><|image_2|></img>. The {bottomwear} is in <img><|image_3|></img>."""
53
+ else:
54
+ input_images = None
55
+
56
 
57
  if randomize_seed:
58
  seed = random.randint(0, 10000000)
59
 
60
+ output = pipe(prompt=text, input_images=input_images, height=height, width=width, guidance_scale=guidance_scale,
61
+ img_guidance_scale=img_guidance_scale, num_inference_steps=inference_steps, separate_cfg_infer=separate_cfg_infer,
62
+ use_kv_cache=True, offload_kv_cache=True, offload_model=offload_model,
63
+ use_input_image_size_as_output=use_input_image_size_as_output, seed=seed, max_input_image_size=max_input_image_size,)
 
 
 
 
 
 
 
 
 
 
 
 
64
  img = output[0]
65
  return img
66
 
 
 
67
  def get_example():
68
  case = [
69
  [
 
266
  return case
267
 
268
  def run_for_examples(text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale, seed, max_input_image_size, randomize_seed, use_input_image_size_as_output):
269
+ # Check the internal configuration of the function
270
  inference_steps = 50
271
  separate_cfg_infer = True
272
  offload_model = False
273
 
274
+ return generate_image(text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale, inference_steps, seed,
275
+ separate_cfg_infer, offload_model, use_input_image_size_as_output, max_input_image_size, randomize_seed)
 
 
 
276
 
277
  description = """
278
+ This is a Virtual Try-On Platform.
279
+ Usage:
280
+ - First upload your own image as the first image, also tagged 'Person'
281
+ - Then upload you 'Top-wear' and 'Bottom-wear' images
282
+ - If its a single dress, and/or you don't have a Topwear and Bottomwear as separate images upload that single image under 'Topwear'
283
+
284
  Tips:
285
  - For image editing task and controlnet task, we recommend setting the height and width of output image as the same as input image. For example, if you want to edit a 512x512 image, you should set the height and width of output image as 512x512. You also can set the `use_input_image_size_as_output` to automatically set the height and width of output image as the same as input image.
286
  - For out-of-memory or time cost, you can set `offload_model=True` or refer to [./docs/inference.md#requiremented-resources](https://github.com/VectorSpaceLab/OmniGen/blob/main/docs/inference.md#requiremented-resources) to select a appropriate setting.
287
  - If inference time is too long when inputting multiple images, please try to reduce the `max_input_image_size`. For more details please refer to [./docs/inference.md#requiremented-resources](https://github.com/VectorSpaceLab/OmniGen/blob/main/docs/inference.md#requiremented-resources).
 
 
 
 
 
 
288
 
289
  **HF Spaces often encounter errors due to quota limitations, so recommend to run it locally.**
290
  """
291
 
292
+ Credits = """**Credits**
293
+ Made using [OmniGen](https://huggingface.co/Shitao/OmniGen-v1): Unified Image Generation [paper](https://arxiv.org/abs/2409.11340) [code](https://github.com/VectorSpaceLab/OmniGen)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
  """
295
 
296
 
297
  # Gradio
298
  with gr.Blocks() as demo:
299
+ gr.Markdown("Virtual Try-On")
300
  gr.Markdown(description)
301
  with gr.Row():
302
+ with gr.Row(equal_height=True):
303
+ # input images
304
+ image_input_1 = gr.Image(label="Person", type="filepath")
305
+ image_input_2 = gr.Image(label="Top-wear", type="filepath")
306
+ image_input_3 = gr.Image(label="Bottom-wear", type="filepath")
 
 
 
 
 
 
307
 
308
+ # slider
309
+ height_input = gr.Slider(
310
+ label="Height", minimum=128, maximum=2048, value=1024, step=16
311
+ )
312
+ width_input = gr.Slider(
313
+ label="Width", minimum=128, maximum=2048, value=1024, step=16
314
+ )
315
 
316
+ guidance_scale_input = gr.Slider(
317
+ label="Guidance Scale", minimum=1.0, maximum=5.0, value=2.5, step=0.1
318
+ )
319
 
320
+ img_guidance_scale_input = gr.Slider(
321
+ label="img_guidance_scale", minimum=1.0, maximum=2.0, value=1.6, step=0.1
322
+ )
323
 
324
+ num_inference_steps = gr.Slider(
325
+ label="Inference Steps", minimum=1, maximum=100, value=50, step=1
326
+ )
327
 
328
+ seed_input = gr.Slider(
329
+ label="Seed", minimum=0, maximum=2147483647, value=42, step=1
330
+ )
331
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
332
 
333
+ max_input_image_size = gr.Slider(
334
+ label="max_input_image_size", minimum=128, maximum=2048, value=1024, step=16
335
+ )
336
 
337
+ separate_cfg_infer = gr.Checkbox(
338
+ label="separate_cfg_infer", info="Whether to use separate inference process for different guidance. This will reduce the memory cost.", value=True,
339
+ )
340
+ offload_model = gr.Checkbox(
341
+ label="offload_model", info="Offload model to CPU, which will significantly reduce the memory cost but slow down the generation speed. You can cancel separate_cfg_infer and set offload_model=True. If both separate_cfg_infer and offload_model are True, further reduce the memory, but slowest generation", value=False,
342
+ )
343
+ use_input_image_size_as_output = gr.Checkbox(
344
+ label="use_input_image_size_as_output", info="Automatically adjust the output image size to be same as input image size. For editing and controlnet task, it can make sure the output image has the same size as input image leading to better performance", value=False,
345
+ )
346
 
347
+ # generate
348
+ generate_button = gr.Button("Generate Image")
349
 
350
+ with gr.Row():
351
+ # output image
352
+ output_image = gr.Image(label="Output Image")
 
353
 
354
  # click
355
  generate_button.click(
356
  generate_image,
357
+ inputs=[image_input_1, image_input_2, image_input_3, height_input, width_input, img_guidance_scale_input, num_inference_steps,
358
+ seed_input, separate_cfg_infer, offload_model, use_input_image_size_as_output, max_input_image_size, randomize_seed,
359
+ guidance_scale_input,],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
360
  outputs=output_image,
361
  )
362
 
363
  gr.Examples(
364
  examples=get_example(),
365
  fn=run_for_examples,
366
+ inputs=[image_input_1, image_input_2, image_input_3, height_input, width_input, img_guidance_scale_input, seed_input,
367
+ max_input_image_size, randomize_seed, use_input_image_size_as_output,guidance_scale_input],
 
 
 
 
 
 
 
 
 
 
 
 
368
  outputs=output_image,
369
  )
370
 
371
+ gr.Markdown(Credits)
372
 
373
  # launch
374
+ demo.launch()