linoyts HF Staff commited on
Commit
0471b1e
·
verified ·
1 Parent(s): dfc1725

revert my changes

Browse files
Files changed (1) hide show
  1. app.py +35 -194
app.py CHANGED
@@ -3,137 +3,14 @@ import numpy as np
3
  import random
4
  import torch
5
  import spaces
6
- import os
7
- import json
8
- import torch
9
  from PIL import Image
10
- from qwenimage.pipeline_qwen_image_edit import QwenImageEditPipeline
11
- from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
12
- from huggingface_hub import InferenceClient
13
  import math
14
 
15
  from optimization import optimize_pipeline_
16
-
17
- # --- Prompt Enhancement using Hugging Face InferenceClient ---
18
- def polish_prompt_hf(original_prompt, system_prompt):
19
- """
20
- Rewrites the prompt using a Hugging Face InferenceClient.
21
- """
22
- # Ensure HF_TOKEN is set
23
- api_key = os.environ.get("HF_TOKEN")
24
- if not api_key:
25
- print("Warning: HF_TOKEN not set. Falling back to original prompt.")
26
- return original_prompt
27
-
28
- try:
29
- # Initialize the client
30
- client = InferenceClient(
31
- provider="cerebras",
32
- api_key=api_key,
33
- )
34
-
35
- # Format the messages for the chat completions API
36
- messages = [
37
- {"role": "system", "content": system_prompt},
38
- {"role": "user", "content": original_prompt}
39
- ]
40
-
41
- # Call the API
42
- completion = client.chat.completions.create(
43
- model="Qwen/Qwen3-235B-A22B-Instruct-2507",
44
- messages=messages,
45
- )
46
-
47
- # Parse the response
48
- result = completion.choices[0].message.content
49
-
50
- # Try to extract JSON if present
51
- if '{"Rewritten"' in result:
52
- try:
53
- # Clean up the response
54
- result = result.replace('```json', '').replace('```', '')
55
- result_json = json.loads(result)
56
- polished_prompt = result_json.get('Rewritten', result)
57
- except:
58
- polished_prompt = result
59
- else:
60
- polished_prompt = result
61
-
62
- polished_prompt = polished_prompt.strip().replace("\n", " ")
63
- return polished_prompt
64
-
65
- except Exception as e:
66
- print(f"Error during API call to Hugging Face: {e}")
67
- # Fallback to original prompt if enhancement fails
68
- return original_prompt
69
-
70
-
71
- def polish_prompt(prompt, img):
72
- """
73
- Main function to polish prompts for image editing using HF inference.
74
- """
75
- SYSTEM_PROMPT = '''
76
- # Edit Instruction Rewriter
77
- You are a professional edit instruction rewriter. Your task is to generate a precise, concise, and visually achievable professional-level edit instruction based on the user-provided instruction and the image to be edited.
78
-
79
- Please strictly follow the rewriting rules below:
80
-
81
- ## 1. General Principles
82
- - Keep the rewritten prompt **concise**. Avoid overly long sentences and reduce unnecessary descriptive language.
83
- - If the instruction is contradictory, vague, or unachievable, prioritize reasonable inference and correction, and supplement details when necessary.
84
- - Keep the core intention of the original instruction unchanged, only enhancing its clarity, rationality, and visual feasibility.
85
- - All added objects or modifications must align with the logic and style of the edited input image's overall scene.
86
-
87
- ## 2. Task Type Handling Rules
88
- ### 1. Add, Delete, Replace Tasks
89
- - If the instruction is clear (already includes task type, target entity, position, quantity, attributes), preserve the original intent and only refine the grammar.
90
- - If the description is vague, supplement with minimal but sufficient details (category, color, size, orientation, position, etc.). For example:
91
- > Original: "Add an animal"
92
- > Rewritten: "Add a light-gray cat in the bottom-right corner, sitting and facing the camera"
93
- - Remove meaningless instructions: e.g., "Add 0 objects" should be ignored or flagged as invalid.
94
- - For replacement tasks, specify "Replace Y with X" and briefly describe the key visual features of X.
95
-
96
- ### 2. Text Editing Tasks
97
- - All text content must be enclosed in English double quotes " ". Do not translate or alter the original language of the text, and do not change the capitalization.
98
- - **For text replacement tasks, always use the fixed template:**
99
- - Replace "xx" to "yy".
100
- - Replace the xx bounding box to "yy".
101
- - If the user does not specify text content, infer and add concise text based on the instruction and the input image's context. For example:
102
- > Original: "Add a line of text" (poster)
103
- > Rewritten: "Add text "LIMITED EDITION" at the top center with slight shadow"
104
- - Specify text position, color, and layout in a concise way.
105
-
106
- ### 3. Human Editing Tasks
107
- - Maintain the person's core visual consistency (ethnicity, gender, age, hairstyle, expression, outfit, etc.).
108
- - If modifying appearance (e.g., clothes, hairstyle), ensure the new element is consistent with the original style.
109
- - **For expression changes, they must be natural and subtle, never exaggerated.**
110
- - If deletion is not specifically emphasized, the most important subject in the original image (e.g., a person, an animal) should be preserved.
111
- - For background change tasks, emphasize maintaining subject consistency at first.
112
- - Example:
113
- > Original: "Change the person's hat"
114
- > Rewritten: "Replace the man's hat with a dark brown beret; keep smile, short hair, and gray jacket unchanged"
115
-
116
- ### 4. Style Transformation or Enhancement Tasks
117
- - If a style is specified, describe it concisely with key visual traits. For example:
118
- > Original: "Disco style"
119
- > Rewritten: "1970s disco: flashing lights, disco ball, mirrored walls, colorful tones"
120
- - If the instruction says "use reference style" or "keep current style," analyze the input image, extract main features (color, composition, texture, lighting, art style), and integrate them concisely.
121
- - **For coloring tasks, including restoring old photos, always use the fixed template:** "Restore old photograph, remove scratches, reduce noise, enhance details, high resolution, realistic, natural skin tones, clear facial features, no distortion, vintage photo restoration"
122
- - If there are other changes, place the style description at the end.
123
-
124
- ## 3. Rationality and Logic Checks
125
- - Resolve contradictory instructions: e.g., "Remove all trees but keep all trees" should be logically corrected.
126
- - Add missing key information: if position is unspecified, choose a reasonable area based on composition (near subject, empty space, center/edges).
127
-
128
- # Output Format
129
- Return only the rewritten instruction text directly, without JSON formatting or any other wrapper.
130
- '''
131
-
132
- # Note: We're not actually using the image in the HF version,
133
- # but keeping the interface consistent
134
- full_prompt = f"{SYSTEM_PROMPT}\n\nUser Input: {prompt}\n\nRewritten Prompt:"
135
-
136
- return polish_prompt_hf(full_prompt, SYSTEM_PROMPT)
137
 
138
 
139
  # --- Model Loading ---
@@ -148,7 +25,7 @@ optimize_pipeline_(pipe, image=Image.new("RGB", (1024, 1024)), prompt="prompt")
148
  # --- UI Constants and Helpers ---
149
  MAX_SEED = np.iinfo(np.int32).max
150
 
151
- # --- Main Inference Function ---
152
  @spaces.GPU(duration=120)
153
  def infer(
154
  image,
@@ -156,14 +33,13 @@ def infer(
156
  seed=42,
157
  randomize_seed=False,
158
  true_guidance_scale=4.0,
159
- num_inference_steps=30,
160
- rewrite_prompt=True,
161
  progress=gr.Progress(track_tqdm=True),
162
  ):
163
  """
164
- Generates an edited image using the Qwen-Image-Edit pipeline with Lightning acceleration.
165
  """
166
- # Hardcode the negative prompt as in the original
167
  negative_prompt = " "
168
 
169
  if randomize_seed:
@@ -172,83 +48,54 @@ def infer(
172
  # Set up the generator for reproducibility
173
  generator = torch.Generator(device=device).manual_seed(seed)
174
 
175
- print(f"Original prompt: '{prompt}'")
176
  print(f"Negative Prompt: '{negative_prompt}'")
177
- print(f"Seed: {seed}, Steps: {num_inference_steps}, Guidance: {true_guidance_scale}")
178
-
179
- if rewrite_prompt:
180
- prompt = polish_prompt(prompt, image)
181
- print(f"Rewritten Prompt: {prompt}")
182
-
183
- # Generate the edited image - always generate just 1 image
184
 
185
- images = pipe(
 
186
  image,
187
  prompt=prompt,
188
  negative_prompt=negative_prompt,
189
  num_inference_steps=num_inference_steps,
190
  generator=generator,
191
  true_cfg_scale=true_guidance_scale,
192
- num_images_per_prompt=1 # Always generate only 1 image
193
- ).images
194
-
195
- # Return the first (and only) image
196
- return images[0], seed
197
-
198
 
 
199
 
200
  # --- Examples and UI Layout ---
 
201
 
202
  css = """
203
  #col-container {
204
  margin: 0 auto;
205
  max-width: 1024px;
206
  }
207
- #logo-title {
208
- text-align: center;
209
- }
210
- #logo-title img {
211
- width: 400px;
212
- }
213
  #edit_text{margin-top: -62px !important}
214
  """
215
 
216
  with gr.Blocks(css=css) as demo:
217
  with gr.Column(elem_id="col-container"):
218
- gr.HTML("""
219
- <div id="logo-title">
220
- <img src="https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/qwen_image_edit_logo.png" alt="Qwen-Image Edit Logo" width="400" style="display: block; margin: 0 auto;">
221
- </div>
222
- """)
223
- gr.Markdown("""
224
- [Learn more](https://github.com/QwenLM/Qwen-Image) about the Qwen-Image series.
225
- Try on [Qwen Chat](https://chat.qwen.ai/), or [download model](https://huggingface.co/Qwen/Qwen-Image-Edit) to run locally with ComfyUI or diffusers.
226
- """)
227
-
228
  with gr.Row():
229
  with gr.Column():
230
- input_image = gr.Image(
231
- label="Input Image",
232
- show_label=True,
233
- type="pil"
 
 
234
  )
235
- # Changed from Gallery to Image
236
- result = gr.Image(
237
- label="Result",
238
- show_label=True,
239
- type="pil"
240
- )
241
-
242
- with gr.Row():
243
- prompt = gr.Text(
244
- label="Edit Instruction",
245
- show_label=False,
246
- placeholder="Describe the edit instruction (e.g., 'Replace the background with a sunset', 'Add a red hat', 'Remove the person')",
247
- container=False,
248
- )
249
- run_button = gr.Button("Edit!", variant="primary")
250
 
251
  with gr.Accordion("Advanced Settings", open=False):
 
 
252
  seed = gr.Slider(
253
  label="Seed",
254
  minimum=0,
@@ -260,29 +107,24 @@ with gr.Blocks(css=css) as demo:
260
  randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
261
 
262
  with gr.Row():
 
263
  true_guidance_scale = gr.Slider(
264
  label="True guidance scale",
265
  minimum=1.0,
266
  maximum=10.0,
267
  step=0.1,
268
- value=4.0
269
  )
270
 
271
  num_inference_steps = gr.Slider(
272
  label="Number of inference steps",
273
- minimum=4,
274
  maximum=50,
275
  step=1,
276
- value=30
277
  )
278
-
279
- # Removed num_images_per_prompt slider entirely
280
- rewrite_prompt = gr.Checkbox(
281
- label="Enhance prompt (using HF Inference)",
282
- value=True
283
- )
284
 
285
- #gr.Examples(examples=[["qwen-image.png", "make her eyes brown"]], inputs=[input_image, prompt], outputs=[result, seed], fn=infer, cache_examples="lazy")
286
 
287
  gr.on(
288
  triggers=[run_button.click, prompt.submit],
@@ -290,12 +132,11 @@ with gr.Blocks(css=css) as demo:
290
  inputs=[
291
  input_image,
292
  prompt,
 
293
  seed,
294
  randomize_seed,
295
  true_guidance_scale,
296
  num_inference_steps,
297
- rewrite_prompt,
298
- # Removed num_images_per_prompt from inputs
299
  ],
300
  outputs=[result, seed],
301
  )
 
3
  import random
4
  import torch
5
  import spaces
6
+
 
 
7
  from PIL import Image
8
+ import torch
 
 
9
  import math
10
 
11
  from optimization import optimize_pipeline_
12
+ from qwenimage.pipeline_qwen_image_edit import QwenImageEditPipeline
13
+ from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
 
16
  # --- Model Loading ---
 
25
  # --- UI Constants and Helpers ---
26
  MAX_SEED = np.iinfo(np.int32).max
27
 
28
+ # --- Main Inference Function (with hardcoded negative prompt) ---
29
  @spaces.GPU(duration=120)
30
  def infer(
31
  image,
 
33
  seed=42,
34
  randomize_seed=False,
35
  true_guidance_scale=4.0,
36
+ num_inference_steps=50,
 
37
  progress=gr.Progress(track_tqdm=True),
38
  ):
39
  """
40
+ Generates an image using the local Qwen-Image diffusers pipeline.
41
  """
42
+ # Hardcode the negative prompt as requested
43
  negative_prompt = " "
44
 
45
  if randomize_seed:
 
48
  # Set up the generator for reproducibility
49
  generator = torch.Generator(device=device).manual_seed(seed)
50
 
51
+ print(f"Calling pipeline with prompt: '{prompt}'")
52
  print(f"Negative Prompt: '{negative_prompt}'")
53
+ print(f"Seed: {seed}, Steps: {num_inference_steps}, Guidance: {guidance_scale}")
 
 
 
 
 
 
54
 
55
+ # Generate the image
56
+ image = pipe(
57
  image,
58
  prompt=prompt,
59
  negative_prompt=negative_prompt,
60
  num_inference_steps=num_inference_steps,
61
  generator=generator,
62
  true_cfg_scale=true_guidance_scale,
63
+ ).images[0]
 
 
 
 
 
64
 
65
+ return image, seed
66
 
67
  # --- Examples and UI Layout ---
68
+ examples = []
69
 
70
  css = """
71
  #col-container {
72
  margin: 0 auto;
73
  max-width: 1024px;
74
  }
 
 
 
 
 
 
75
  #edit_text{margin-top: -62px !important}
76
  """
77
 
78
  with gr.Blocks(css=css) as demo:
79
  with gr.Column(elem_id="col-container"):
80
+ gr.HTML('<img src="https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/qwen_image_logo.png" alt="Qwen-Image Logo" width="400" style="display: block; margin: 0 auto;">')
81
+ gr.HTML('<h1 style="text-align: center;margin-left: 80px;color: #5b47d1;font-style: italic;">Edit</h1>', elem_id="edit_text")
82
+ gr.Markdown("[Learn more](https://github.com/QwenLM/Qwen-Image) about the Qwen-Image series. Try on [Qwen Chat](https://chat.qwen.ai/), or [download model](https://huggingface.co/Qwen/Qwen-Image-Edit) to run locally with ComfyUI or diffusers.")
 
 
 
 
 
 
 
83
  with gr.Row():
84
  with gr.Column():
85
+ input_image = gr.Image(label="Input Image", show_label=False, type="pil")
86
+ prompt = gr.Text(
87
+ label="Prompt",
88
+ show_label=False,
89
+ placeholder="describe the edit instruction",
90
+ container=False,
91
  )
92
+ run_button = gr.Button("Edit!", variant="primary")
93
+
94
+ result = gr.Image(label="Result", show_label=False, type="pil")
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  with gr.Accordion("Advanced Settings", open=False):
97
+ # Negative prompt UI element is removed here
98
+
99
  seed = gr.Slider(
100
  label="Seed",
101
  minimum=0,
 
107
  randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
108
 
109
  with gr.Row():
110
+
111
  true_guidance_scale = gr.Slider(
112
  label="True guidance scale",
113
  minimum=1.0,
114
  maximum=10.0,
115
  step=0.1,
116
+ value=1.0
117
  )
118
 
119
  num_inference_steps = gr.Slider(
120
  label="Number of inference steps",
121
+ minimum=1,
122
  maximum=50,
123
  step=1,
124
+ value=50,
125
  )
 
 
 
 
 
 
126
 
127
+ # gr.Examples(examples=examples, inputs=[prompt], outputs=[result, seed], fn=infer, cache_examples=False)
128
 
129
  gr.on(
130
  triggers=[run_button.click, prompt.submit],
 
132
  inputs=[
133
  input_image,
134
  prompt,
135
+ # negative_prompt is no longer an input from the UI
136
  seed,
137
  randomize_seed,
138
  true_guidance_scale,
139
  num_inference_steps,
 
 
140
  ],
141
  outputs=[result, seed],
142
  )