linoyts HF Staff commited on
Commit
dfc1725
·
verified ·
1 Parent(s): 7d59b84

Update app.py (#1)

Browse files

- Update app.py (e6c7562114bf2f90bde11cf06570e4969dc7a718)
- Update app.py (4553d9dd647486cfa84325be729010afde4d73ba)
- Update app.py (346e110a66bd7153e3696f9bb08ce78c7ab67305)
- Update app.py (9e85d08389c3f4ff553d1ddcfbce9290ddc1b0d3)

Files changed (1) hide show
  1. app.py +194 -35
app.py CHANGED
@@ -3,14 +3,137 @@ import numpy as np
3
  import random
4
  import torch
5
  import spaces
6
-
 
 
7
  from PIL import Image
8
- import torch
 
 
9
  import math
10
 
11
  from optimization import optimize_pipeline_
12
- from qwenimage.pipeline_qwen_image_edit import QwenImageEditPipeline
13
- from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
 
16
  # --- Model Loading ---
@@ -25,7 +148,7 @@ optimize_pipeline_(pipe, image=Image.new("RGB", (1024, 1024)), prompt="prompt")
25
  # --- UI Constants and Helpers ---
26
  MAX_SEED = np.iinfo(np.int32).max
27
 
28
- # --- Main Inference Function (with hardcoded negative prompt) ---
29
  @spaces.GPU(duration=120)
30
  def infer(
31
  image,
@@ -33,13 +156,14 @@ def infer(
33
  seed=42,
34
  randomize_seed=False,
35
  true_guidance_scale=4.0,
36
- num_inference_steps=50,
 
37
  progress=gr.Progress(track_tqdm=True),
38
  ):
39
  """
40
- Generates an image using the local Qwen-Image diffusers pipeline.
41
  """
42
- # Hardcode the negative prompt as requested
43
  negative_prompt = " "
44
 
45
  if randomize_seed:
@@ -48,54 +172,83 @@ def infer(
48
  # Set up the generator for reproducibility
49
  generator = torch.Generator(device=device).manual_seed(seed)
50
 
51
- print(f"Calling pipeline with prompt: '{prompt}'")
52
  print(f"Negative Prompt: '{negative_prompt}'")
53
- print(f"Seed: {seed}, Steps: {num_inference_steps}, Guidance: {guidance_scale}")
 
 
 
 
 
 
54
 
55
- # Generate the image
56
- image = pipe(
57
  image,
58
  prompt=prompt,
59
  negative_prompt=negative_prompt,
60
  num_inference_steps=num_inference_steps,
61
  generator=generator,
62
  true_cfg_scale=true_guidance_scale,
63
- ).images[0]
 
 
 
 
 
64
 
65
- return image, seed
66
 
67
  # --- Examples and UI Layout ---
68
- examples = []
69
 
70
  css = """
71
  #col-container {
72
  margin: 0 auto;
73
  max-width: 1024px;
74
  }
 
 
 
 
 
 
75
  #edit_text{margin-top: -62px !important}
76
  """
77
 
78
  with gr.Blocks(css=css) as demo:
79
  with gr.Column(elem_id="col-container"):
80
- gr.HTML('<img src="https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/qwen_image_logo.png" alt="Qwen-Image Logo" width="400" style="display: block; margin: 0 auto;">')
81
- gr.HTML('<h1 style="text-align: center;margin-left: 80px;color: #5b47d1;font-style: italic;">Edit</h1>', elem_id="edit_text")
82
- gr.Markdown("[Learn more](https://github.com/QwenLM/Qwen-Image) about the Qwen-Image series. Try on [Qwen Chat](https://chat.qwen.ai/), or [download model](https://huggingface.co/Qwen/Qwen-Image-Edit) to run locally with ComfyUI or diffusers.")
 
 
 
 
 
 
 
83
  with gr.Row():
84
  with gr.Column():
85
- input_image = gr.Image(label="Input Image", show_label=False, type="pil")
86
- prompt = gr.Text(
87
- label="Prompt",
88
- show_label=False,
89
- placeholder="describe the edit instruction",
90
- container=False,
91
  )
92
- run_button = gr.Button("Edit!", variant="primary")
93
-
94
- result = gr.Image(label="Result", show_label=False, type="pil")
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  with gr.Accordion("Advanced Settings", open=False):
97
- # Negative prompt UI element is removed here
98
-
99
  seed = gr.Slider(
100
  label="Seed",
101
  minimum=0,
@@ -107,24 +260,29 @@ with gr.Blocks(css=css) as demo:
107
  randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
108
 
109
  with gr.Row():
110
-
111
  true_guidance_scale = gr.Slider(
112
  label="True guidance scale",
113
  minimum=1.0,
114
  maximum=10.0,
115
  step=0.1,
116
- value=1.0
117
  )
118
 
119
  num_inference_steps = gr.Slider(
120
  label="Number of inference steps",
121
- minimum=1,
122
  maximum=50,
123
  step=1,
124
- value=50,
125
  )
 
 
 
 
 
 
126
 
127
- # gr.Examples(examples=examples, inputs=[prompt], outputs=[result, seed], fn=infer, cache_examples=False)
128
 
129
  gr.on(
130
  triggers=[run_button.click, prompt.submit],
@@ -132,11 +290,12 @@ with gr.Blocks(css=css) as demo:
132
  inputs=[
133
  input_image,
134
  prompt,
135
- # negative_prompt is no longer an input from the UI
136
  seed,
137
  randomize_seed,
138
  true_guidance_scale,
139
  num_inference_steps,
 
 
140
  ],
141
  outputs=[result, seed],
142
  )
 
3
  import random
4
  import torch
5
  import spaces
6
+ import os
7
+ import json
8
+ import torch
9
  from PIL import Image
10
+ from qwenimage.pipeline_qwen_image_edit import QwenImageEditPipeline
11
+ from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
12
+ from huggingface_hub import InferenceClient
13
  import math
14
 
15
  from optimization import optimize_pipeline_
16
+
17
+ # --- Prompt Enhancement using Hugging Face InferenceClient ---
18
+ def polish_prompt_hf(original_prompt, system_prompt):
19
+ """
20
+ Rewrites the prompt using a Hugging Face InferenceClient.
21
+ """
22
+ # Ensure HF_TOKEN is set
23
+ api_key = os.environ.get("HF_TOKEN")
24
+ if not api_key:
25
+ print("Warning: HF_TOKEN not set. Falling back to original prompt.")
26
+ return original_prompt
27
+
28
+ try:
29
+ # Initialize the client
30
+ client = InferenceClient(
31
+ provider="cerebras",
32
+ api_key=api_key,
33
+ )
34
+
35
+ # Format the messages for the chat completions API
36
+ messages = [
37
+ {"role": "system", "content": system_prompt},
38
+ {"role": "user", "content": original_prompt}
39
+ ]
40
+
41
+ # Call the API
42
+ completion = client.chat.completions.create(
43
+ model="Qwen/Qwen3-235B-A22B-Instruct-2507",
44
+ messages=messages,
45
+ )
46
+
47
+ # Parse the response
48
+ result = completion.choices[0].message.content
49
+
50
+ # Try to extract JSON if present
51
+ if '{"Rewritten"' in result:
52
+ try:
53
+ # Clean up the response
54
+ result = result.replace('```json', '').replace('```', '')
55
+ result_json = json.loads(result)
56
+ polished_prompt = result_json.get('Rewritten', result)
57
+ except:
58
+ polished_prompt = result
59
+ else:
60
+ polished_prompt = result
61
+
62
+ polished_prompt = polished_prompt.strip().replace("\n", " ")
63
+ return polished_prompt
64
+
65
+ except Exception as e:
66
+ print(f"Error during API call to Hugging Face: {e}")
67
+ # Fallback to original prompt if enhancement fails
68
+ return original_prompt
69
+
70
+
71
+ def polish_prompt(prompt, img):
72
+ """
73
+ Main function to polish prompts for image editing using HF inference.
74
+ """
75
+ SYSTEM_PROMPT = '''
76
+ # Edit Instruction Rewriter
77
+ You are a professional edit instruction rewriter. Your task is to generate a precise, concise, and visually achievable professional-level edit instruction based on the user-provided instruction and the image to be edited.
78
+
79
+ Please strictly follow the rewriting rules below:
80
+
81
+ ## 1. General Principles
82
+ - Keep the rewritten prompt **concise**. Avoid overly long sentences and reduce unnecessary descriptive language.
83
+ - If the instruction is contradictory, vague, or unachievable, prioritize reasonable inference and correction, and supplement details when necessary.
84
+ - Keep the core intention of the original instruction unchanged, only enhancing its clarity, rationality, and visual feasibility.
85
+ - All added objects or modifications must align with the logic and style of the edited input image's overall scene.
86
+
87
+ ## 2. Task Type Handling Rules
88
+ ### 1. Add, Delete, Replace Tasks
89
+ - If the instruction is clear (already includes task type, target entity, position, quantity, attributes), preserve the original intent and only refine the grammar.
90
+ - If the description is vague, supplement with minimal but sufficient details (category, color, size, orientation, position, etc.). For example:
91
+ > Original: "Add an animal"
92
+ > Rewritten: "Add a light-gray cat in the bottom-right corner, sitting and facing the camera"
93
+ - Remove meaningless instructions: e.g., "Add 0 objects" should be ignored or flagged as invalid.
94
+ - For replacement tasks, specify "Replace Y with X" and briefly describe the key visual features of X.
95
+
96
+ ### 2. Text Editing Tasks
97
+ - All text content must be enclosed in English double quotes " ". Do not translate or alter the original language of the text, and do not change the capitalization.
98
+ - **For text replacement tasks, always use the fixed template:**
99
+ - Replace "xx" to "yy".
100
+ - Replace the xx bounding box to "yy".
101
+ - If the user does not specify text content, infer and add concise text based on the instruction and the input image's context. For example:
102
+ > Original: "Add a line of text" (poster)
103
+ > Rewritten: "Add text "LIMITED EDITION" at the top center with slight shadow"
104
+ - Specify text position, color, and layout in a concise way.
105
+
106
+ ### 3. Human Editing Tasks
107
+ - Maintain the person's core visual consistency (ethnicity, gender, age, hairstyle, expression, outfit, etc.).
108
+ - If modifying appearance (e.g., clothes, hairstyle), ensure the new element is consistent with the original style.
109
+ - **For expression changes, they must be natural and subtle, never exaggerated.**
110
+ - If deletion is not specifically emphasized, the most important subject in the original image (e.g., a person, an animal) should be preserved.
111
+ - For background change tasks, emphasize maintaining subject consistency at first.
112
+ - Example:
113
+ > Original: "Change the person's hat"
114
+ > Rewritten: "Replace the man's hat with a dark brown beret; keep smile, short hair, and gray jacket unchanged"
115
+
116
+ ### 4. Style Transformation or Enhancement Tasks
117
+ - If a style is specified, describe it concisely with key visual traits. For example:
118
+ > Original: "Disco style"
119
+ > Rewritten: "1970s disco: flashing lights, disco ball, mirrored walls, colorful tones"
120
+ - If the instruction says "use reference style" or "keep current style," analyze the input image, extract main features (color, composition, texture, lighting, art style), and integrate them concisely.
121
+ - **For coloring tasks, including restoring old photos, always use the fixed template:** "Restore old photograph, remove scratches, reduce noise, enhance details, high resolution, realistic, natural skin tones, clear facial features, no distortion, vintage photo restoration"
122
+ - If there are other changes, place the style description at the end.
123
+
124
+ ## 3. Rationality and Logic Checks
125
+ - Resolve contradictory instructions: e.g., "Remove all trees but keep all trees" should be logically corrected.
126
+ - Add missing key information: if position is unspecified, choose a reasonable area based on composition (near subject, empty space, center/edges).
127
+
128
+ # Output Format
129
+ Return only the rewritten instruction text directly, without JSON formatting or any other wrapper.
130
+ '''
131
+
132
+ # Note: We're not actually using the image in the HF version,
133
+ # but keeping the interface consistent
134
+ full_prompt = f"{SYSTEM_PROMPT}\n\nUser Input: {prompt}\n\nRewritten Prompt:"
135
+
136
+ return polish_prompt_hf(full_prompt, SYSTEM_PROMPT)
137
 
138
 
139
  # --- Model Loading ---
 
148
  # --- UI Constants and Helpers ---
149
  MAX_SEED = np.iinfo(np.int32).max
150
 
151
+ # --- Main Inference Function ---
152
  @spaces.GPU(duration=120)
153
  def infer(
154
  image,
 
156
  seed=42,
157
  randomize_seed=False,
158
  true_guidance_scale=4.0,
159
+ num_inference_steps=30,
160
+ rewrite_prompt=True,
161
  progress=gr.Progress(track_tqdm=True),
162
  ):
163
  """
164
+ Generates an edited image using the Qwen-Image-Edit pipeline with Lightning acceleration.
165
  """
166
+ # Hardcode the negative prompt as in the original
167
  negative_prompt = " "
168
 
169
  if randomize_seed:
 
172
  # Set up the generator for reproducibility
173
  generator = torch.Generator(device=device).manual_seed(seed)
174
 
175
+ print(f"Original prompt: '{prompt}'")
176
  print(f"Negative Prompt: '{negative_prompt}'")
177
+ print(f"Seed: {seed}, Steps: {num_inference_steps}, Guidance: {true_guidance_scale}")
178
+
179
+ if rewrite_prompt:
180
+ prompt = polish_prompt(prompt, image)
181
+ print(f"Rewritten Prompt: {prompt}")
182
+
183
+ # Generate the edited image - always generate just 1 image
184
 
185
+ images = pipe(
 
186
  image,
187
  prompt=prompt,
188
  negative_prompt=negative_prompt,
189
  num_inference_steps=num_inference_steps,
190
  generator=generator,
191
  true_cfg_scale=true_guidance_scale,
192
+ num_images_per_prompt=1 # Always generate only 1 image
193
+ ).images
194
+
195
+ # Return the first (and only) image
196
+ return images[0], seed
197
+
198
 
 
199
 
200
  # --- Examples and UI Layout ---
 
201
 
202
  css = """
203
  #col-container {
204
  margin: 0 auto;
205
  max-width: 1024px;
206
  }
207
+ #logo-title {
208
+ text-align: center;
209
+ }
210
+ #logo-title img {
211
+ width: 400px;
212
+ }
213
  #edit_text{margin-top: -62px !important}
214
  """
215
 
216
  with gr.Blocks(css=css) as demo:
217
  with gr.Column(elem_id="col-container"):
218
+ gr.HTML("""
219
+ <div id="logo-title">
220
+ <img src="https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/qwen_image_edit_logo.png" alt="Qwen-Image Edit Logo" width="400" style="display: block; margin: 0 auto;">
221
+ </div>
222
+ """)
223
+ gr.Markdown("""
224
+ [Learn more](https://github.com/QwenLM/Qwen-Image) about the Qwen-Image series.
225
+ Try on [Qwen Chat](https://chat.qwen.ai/), or [download model](https://huggingface.co/Qwen/Qwen-Image-Edit) to run locally with ComfyUI or diffusers.
226
+ """)
227
+
228
  with gr.Row():
229
  with gr.Column():
230
+ input_image = gr.Image(
231
+ label="Input Image",
232
+ show_label=True,
233
+ type="pil"
 
 
234
  )
235
+ # Changed from Gallery to Image
236
+ result = gr.Image(
237
+ label="Result",
238
+ show_label=True,
239
+ type="pil"
240
+ )
241
+
242
+ with gr.Row():
243
+ prompt = gr.Text(
244
+ label="Edit Instruction",
245
+ show_label=False,
246
+ placeholder="Describe the edit instruction (e.g., 'Replace the background with a sunset', 'Add a red hat', 'Remove the person')",
247
+ container=False,
248
+ )
249
+ run_button = gr.Button("Edit!", variant="primary")
250
 
251
  with gr.Accordion("Advanced Settings", open=False):
 
 
252
  seed = gr.Slider(
253
  label="Seed",
254
  minimum=0,
 
260
  randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
261
 
262
  with gr.Row():
 
263
  true_guidance_scale = gr.Slider(
264
  label="True guidance scale",
265
  minimum=1.0,
266
  maximum=10.0,
267
  step=0.1,
268
+ value=4.0
269
  )
270
 
271
  num_inference_steps = gr.Slider(
272
  label="Number of inference steps",
273
+ minimum=4,
274
  maximum=50,
275
  step=1,
276
+ value=30
277
  )
278
+
279
+ # Removed num_images_per_prompt slider entirely
280
+ rewrite_prompt = gr.Checkbox(
281
+ label="Enhance prompt (using HF Inference)",
282
+ value=True
283
+ )
284
 
285
+ #gr.Examples(examples=[["qwen-image.png", "make her eyes brown"]], inputs=[input_image, prompt], outputs=[result, seed], fn=infer, cache_examples="lazy")
286
 
287
  gr.on(
288
  triggers=[run_button.click, prompt.submit],
 
290
  inputs=[
291
  input_image,
292
  prompt,
 
293
  seed,
294
  randomize_seed,
295
  true_guidance_scale,
296
  num_inference_steps,
297
+ rewrite_prompt,
298
+ # Removed num_images_per_prompt from inputs
299
  ],
300
  outputs=[result, seed],
301
  )