seawolf2357 commited on
Commit
53b20f2
Β·
verified Β·
1 Parent(s): a6147c0

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +535 -0
app.py ADDED
@@ -0,0 +1,535 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import random
4
+ import torch
5
+ import spaces
6
+ from PIL import Image
7
+ from diffusers import QwenImageEditPipeline
8
+ from diffusers.utils import is_xformers_available
9
+ import os
10
+ import base64
11
+ import json
12
+ from huggingface_hub import InferenceClient
13
+ import logging
14
+
15
+ #############################
16
+ os.environ.setdefault('GRADIO_ANALYTICS_ENABLED', 'False')
17
+ os.environ.setdefault('HF_HUB_DISABLE_TELEMETRY', '1')
18
+ logging.basicConfig(level=logging.DEBUG)
19
+ logger = logging.getLogger(__name__)
20
+ #############################
21
+
22
+ def get_caption_language(prompt):
23
+ """Detects if the prompt contains Chinese characters."""
24
+ ranges = [
25
+ ('\u4e00', '\u9fff'), # CJK Unified Ideographs
26
+ ]
27
+ for char in prompt:
28
+ if any(start <= char <= end for start, end in ranges):
29
+ return 'zh'
30
+ return 'en'
31
+
32
+ def polish_prompt(original_prompt, system_prompt, hf_token):
33
+ """
34
+ Rewrites the prompt using a Hugging Face InferenceClient.
35
+ Requires user-provided HF token for API access.
36
+ """
37
+ if not hf_token or not hf_token.strip():
38
+ gr.Warning("HF Token is required for prompt rewriting but was not provided!")
39
+ return original_prompt
40
+ client = InferenceClient(
41
+ provider="cerebras",
42
+ api_key=hf_token,
43
+ )
44
+ messages = [
45
+ {"role": "system", "content": system_prompt},
46
+ {"role": "user", "content": original_prompt}
47
+ ]
48
+ try:
49
+ completion = client.chat.completions.create(
50
+ model="Qwen/Qwen3-235B-A22B-Instruct-2507",
51
+ messages=messages,
52
+ max_tokens=512,
53
+ )
54
+ polished_prompt = completion.choices[0].message.content
55
+ polished_prompt = polished_prompt.strip().replace("\n", " ")
56
+ return polished_prompt
57
+ except Exception as e:
58
+ print(f"Error during Hugging Face API call: {e}")
59
+ gr.Warning("Failed to rewrite prompt. Using original.")
60
+ return original_prompt
61
+
62
+ SYSTEM_PROMPT_EDIT = '''
63
+ # Edit Instruction Rewriter
64
+ You are a professional edit instruction rewriter. Your task is to generate a precise, concise, and visually achievable instruction based on the user's intent and the input image.
65
+ ## 1. General Principles
66
+ - Keep the rewritten instruction **concise** and clear.
67
+ - Avoid contradictions, vagueness, or unachievable instructions.
68
+ - Maintain the core logic of the original instruction; only enhance clarity and feasibility.
69
+ - Ensure new added elements or modifications align with the image's original context and art style.
70
+ ## 2. Task Types
71
+ ### Add, Delete, Replace:
72
+ - When the input is detailed, only refine grammar and clarity.
73
+ - For vague instructions, infer minimal but sufficient details.
74
+ - For replacement, use the format: `"Replace X with Y"`.
75
+ ### Text Editing (e.g., text replacement):
76
+ - Enclose text content in quotes, e.g., `Replace "abc" with "xyz"`.
77
+ - Preserving the original structure and languageβ€”**do not translate** or alter style.
78
+ ### Human Editing (e.g., change a person's face/hair):
79
+ - Preserve core visual identity (gender, ethnic features).
80
+ - Describe expressions in subtle and natural terms.
81
+ - Maintain key clothing or styling details unless explicitly replaced.
82
+ ### Style Transformation:
83
+ - If a style is specified, e.g., `Disco style`, rewrite it to encapsulate the essential visual traits.
84
+ - Use a fixed template for **coloring/restoration**:
85
+ `"Restore old photograph, remove scratches, reduce noise, enhance details, high resolution, realistic, natural skin tones, clear facial features, no distortion, vintage photo restoration"`
86
+ if applicable.
87
+ ## 4. Output Format
88
+ Please provide the rewritten instruction in a clean `json` format as:
89
+ {
90
+ "Rewritten": "..."
91
+ }
92
+ '''
93
+
94
+ dtype = torch.bfloat16
95
+ device = "cuda" if torch.cuda.is_available() else "cpu"
96
+ pipe = QwenImageEditPipeline.from_pretrained("Qwen/Qwen-Image-Edit", torch_dtype=dtype).to(device)
97
+
98
+ # Load LoRA weights for acceleration
99
+ pipe.load_lora_weights(
100
+ "lightx2v/Qwen-Image-Lightning", weight_name="Qwen-Image-Lightning-8steps-V1.1.safetensors"
101
+ )
102
+ pipe.fuse_lora()
103
+
104
+ if is_xformers_available():
105
+ pipe.enable_xformers_memory_efficient_attention()
106
+ else:
107
+ print("xformers not available or failed to load.")
108
+
109
+ @spaces.GPU(duration=60)
110
+ def infer(
111
+ image,
112
+ prompt,
113
+ seed=42,
114
+ randomize_seed=False,
115
+ true_guidance_scale=1.0,
116
+ num_inference_steps=8,
117
+ rewrite_prompt=False,
118
+ hf_token="",
119
+ num_images_per_prompt=1,
120
+ progress=gr.Progress(track_tqdm=True),
121
+ ):
122
+ """
123
+ Requires user-provided HF token for prompt rewriting.
124
+ """
125
+ original_prompt = prompt # Save original prompt for display
126
+ negative_prompt = " "
127
+ prompt_info = "" # Initialize info text
128
+
129
+ # Handle prompt rewriting with status messages
130
+ if rewrite_prompt:
131
+ if not hf_token.strip():
132
+ gr.Warning("HF Token is required for prompt rewriting but was not provided!")
133
+ prompt_info = f"""<div class="prompt-info-box warning">
134
+ <h3>⚠️ Prompt Rewriting Skipped</h3>
135
+ <p><strong>Original:</strong> {original_prompt}</p>
136
+ <p class="note">HF Token required for enhancement</p>
137
+ </div>"""
138
+ rewritten_prompt = original_prompt
139
+ else:
140
+ try:
141
+ rewritten_prompt = polish_prompt(original_prompt, SYSTEM_PROMPT_EDIT, hf_token)
142
+ prompt_info = f"""<div class="prompt-info-box success">
143
+ <h3>✨ Enhanced Successfully</h3>
144
+ <p><strong>Original:</strong> {original_prompt}</p>
145
+ <p><strong>Enhanced:</strong> {rewritten_prompt}</p>
146
+ </div>"""
147
+ except Exception as e:
148
+ gr.Warning(f"Prompt rewriting failed: {str(e)}")
149
+ rewritten_prompt = original_prompt
150
+ prompt_info = f"""<div class="prompt-info-box error">
151
+ <h3>❌ Enhancement Failed</h3>
152
+ <p><strong>Original:</strong> {original_prompt}</p>
153
+ <p class="note">Error: {str(e)}</p>
154
+ </div>"""
155
+ else:
156
+ rewritten_prompt = original_prompt
157
+ prompt_info = f"""<div class="prompt-info-box default">
158
+ <h3>πŸ“ Original Prompt</h3>
159
+ <p>{original_prompt}</p>
160
+ </div>"""
161
+
162
+ # Generate images
163
+ if randomize_seed:
164
+ seed = random.randint(0, MAX_SEED)
165
+ generator = torch.Generator(device=device).manual_seed(seed)
166
+
167
+ edited_images = pipe(
168
+ image,
169
+ prompt=rewritten_prompt,
170
+ negative_prompt=negative_prompt,
171
+ num_inference_steps=num_inference_steps,
172
+ generator=generator,
173
+ true_cfg_scale=true_guidance_scale,
174
+ num_images_per_prompt=num_images_per_prompt,
175
+ ).images
176
+
177
+ return edited_images, seed, prompt_info
178
+
179
+ MAX_SEED = np.iinfo(np.int32).max
180
+
181
+ examples = [
182
+ "Replace the cat with a friendly golden retriever. Make it look happier, and add more background details.",
183
+ "Add text 'Qwen - AI for image editing' in Chinese at the bottom center with a small shadow.",
184
+ "Change the style to 1970s vintage, add old photo effect, restore any scratches on the wall or window.",
185
+ "Remove the blue sky and replace it with a dark night cityscape.",
186
+ """Replace "Qwen" with "ι€šδΉ‰" in the Image. Ensure Chinese font is used for "ι€šδΉ‰" and position it to the top left with a light heading-style font."""
187
+ ]
188
+
189
+ # Custom CSS for enhanced visual design
190
+ custom_css = """
191
+ /* Gradient background */
192
+ .gradio-container {
193
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 25%, #f093fb 50%, #fecfef 75%, #fecfef 100%);
194
+ min-height: 100vh;
195
+ }
196
+
197
+ /* Main container styling */
198
+ .container {
199
+ max-width: 1400px !important;
200
+ margin: 0 auto !important;
201
+ padding: 2rem !important;
202
+ }
203
+
204
+ /* Card-like sections */
205
+ .gr-box {
206
+ background: rgba(255, 255, 255, 0.95) !important;
207
+ backdrop-filter: blur(10px) !important;
208
+ border-radius: 20px !important;
209
+ box-shadow: 0 20px 40px rgba(0, 0, 0, 0.1) !important;
210
+ border: 1px solid rgba(255, 255, 255, 0.5) !important;
211
+ padding: 1.5rem !important;
212
+ margin-bottom: 1.5rem !important;
213
+ }
214
+
215
+ /* Header styling */
216
+ h1 {
217
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
218
+ -webkit-background-clip: text;
219
+ -webkit-text-fill-color: transparent;
220
+ background-clip: text;
221
+ font-size: 3rem !important;
222
+ font-weight: 800 !important;
223
+ text-align: center;
224
+ margin-bottom: 0.5rem !important;
225
+ text-shadow: 2px 2px 4px rgba(0,0,0,0.1);
226
+ }
227
+
228
+ h2 {
229
+ color: #4a5568 !important;
230
+ font-size: 1.5rem !important;
231
+ font-weight: 600 !important;
232
+ margin-bottom: 1rem !important;
233
+ }
234
+
235
+ /* Button styling */
236
+ .gr-button-primary {
237
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
238
+ border: none !important;
239
+ color: white !important;
240
+ font-weight: 600 !important;
241
+ font-size: 1.1rem !important;
242
+ padding: 0.8rem 2rem !important;
243
+ border-radius: 12px !important;
244
+ box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4) !important;
245
+ transition: all 0.3s ease !important;
246
+ }
247
+
248
+ .gr-button-primary:hover {
249
+ transform: translateY(-2px) !important;
250
+ box-shadow: 0 6px 20px rgba(102, 126, 234, 0.5) !important;
251
+ }
252
+
253
+ /* Input fields styling */
254
+ .gr-input, .gr-text-input, .gr-slider, .gr-dropdown {
255
+ border-radius: 10px !important;
256
+ border: 2px solid #e2e8f0 !important;
257
+ background: white !important;
258
+ transition: all 0.3s ease !important;
259
+ }
260
+
261
+ .gr-input:focus, .gr-text-input:focus {
262
+ border-color: #667eea !important;
263
+ box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1) !important;
264
+ }
265
+
266
+ /* Accordion styling */
267
+ .gr-accordion {
268
+ background: rgba(255, 255, 255, 0.8) !important;
269
+ border-radius: 12px !important;
270
+ border: 1px solid rgba(102, 126, 234, 0.2) !important;
271
+ overflow: hidden !important;
272
+ }
273
+
274
+ /* Gallery styling */
275
+ .gr-gallery {
276
+ border-radius: 12px !important;
277
+ overflow: hidden !important;
278
+ }
279
+
280
+ /* Prompt info boxes */
281
+ .prompt-info-box {
282
+ padding: 1.5rem;
283
+ border-radius: 12px;
284
+ margin: 1rem 0;
285
+ animation: fadeIn 0.5s ease;
286
+ }
287
+
288
+ .prompt-info-box h3 {
289
+ margin: 0 0 0.75rem 0;
290
+ font-size: 1.2rem;
291
+ font-weight: 600;
292
+ }
293
+
294
+ .prompt-info-box p {
295
+ margin: 0.5rem 0;
296
+ line-height: 1.6;
297
+ }
298
+
299
+ .prompt-info-box.success {
300
+ background: linear-gradient(135deg, #d4f4dd 0%, #e3f9e5 100%);
301
+ border-left: 4px solid #48bb78;
302
+ }
303
+
304
+ .prompt-info-box.warning {
305
+ background: linear-gradient(135deg, #fef5e7 0%, #fff9ec 100%);
306
+ border-left: 4px solid #f6ad55;
307
+ }
308
+
309
+ .prompt-info-box.error {
310
+ background: linear-gradient(135deg, #fed7d7 0%, #fee5e5 100%);
311
+ border-left: 4px solid #fc8181;
312
+ }
313
+
314
+ .prompt-info-box.default {
315
+ background: linear-gradient(135deg, #e6f3ff 0%, #f0f7ff 100%);
316
+ border-left: 4px solid #667eea;
317
+ }
318
+
319
+ .prompt-info-box .note {
320
+ font-size: 0.9rem;
321
+ color: #718096;
322
+ font-style: italic;
323
+ }
324
+
325
+ /* Checkbox styling */
326
+ .gr-checkbox {
327
+ background: white !important;
328
+ border-radius: 8px !important;
329
+ padding: 0.5rem !important;
330
+ }
331
+
332
+ /* Token input field */
333
+ input[type="password"] {
334
+ font-family: monospace !important;
335
+ letter-spacing: 0.05em !important;
336
+ }
337
+
338
+ /* Info badges */
339
+ .gr-markdown p {
340
+ color: #4a5568;
341
+ line-height: 1.6;
342
+ }
343
+
344
+ .gr-markdown a {
345
+ color: #667eea !important;
346
+ text-decoration: none !important;
347
+ font-weight: 500 !important;
348
+ transition: color 0.3s ease !important;
349
+ }
350
+
351
+ .gr-markdown a:hover {
352
+ color: #764ba2 !important;
353
+ text-decoration: underline !important;
354
+ }
355
+
356
+ /* Animation */
357
+ @keyframes fadeIn {
358
+ from {
359
+ opacity: 0;
360
+ transform: translateY(10px);
361
+ }
362
+ to {
363
+ opacity: 1;
364
+ transform: translateY(0);
365
+ }
366
+ }
367
+
368
+ /* Slider styling */
369
+ .gr-slider input[type="range"] {
370
+ background: linear-gradient(90deg, #667eea 0%, #764ba2 100%) !important;
371
+ }
372
+
373
+ /* Group styling */
374
+ .gr-group {
375
+ background: rgba(249, 250, 251, 0.8) !important;
376
+ border-radius: 12px !important;
377
+ padding: 1rem !important;
378
+ margin-top: 1rem !important;
379
+ }
380
+
381
+ /* Loading spinner customization */
382
+ .gr-loading {
383
+ color: #667eea !important;
384
+ }
385
+
386
+ /* Example buttons */
387
+ .gr-examples button {
388
+ background: white !important;
389
+ border: 2px solid #e2e8f0 !important;
390
+ border-radius: 8px !important;
391
+ padding: 0.5rem 1rem !important;
392
+ transition: all 0.3s ease !important;
393
+ }
394
+
395
+ .gr-examples button:hover {
396
+ border-color: #667eea !important;
397
+ background: rgba(102, 126, 234, 0.05) !important;
398
+ }
399
+ """
400
+
401
+ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
402
+ gr.Markdown("# 🎨 Qwen-Image-Edit Lightning")
403
+ gr.Markdown("✨ **Ultra-fast 8-step image editing with AI-powered prompt enhancement**")
404
+ gr.Markdown("πŸ” **Secure prompt rewriting with your [Hugging Face token](https://huggingface.co/settings/tokens)**")
405
+
406
+ with gr.Row():
407
+ with gr.Column(scale=1):
408
+ with gr.Group():
409
+ input_image = gr.Image(
410
+ label="πŸ“Έ Input Image",
411
+ type="pil",
412
+ elem_classes="gr-box"
413
+ )
414
+ prompt = gr.Text(
415
+ label="✏️ Edit Instruction",
416
+ placeholder="e.g. Add a dog to the right side, change the sky to sunset...",
417
+ lines=3,
418
+ elem_classes="gr-box"
419
+ )
420
+
421
+ with gr.Accordion("βš™οΈ Advanced Settings", open=False):
422
+ seed = gr.Slider(
423
+ label="Seed",
424
+ minimum=0,
425
+ maximum=MAX_SEED,
426
+ step=1,
427
+ value=0
428
+ )
429
+ randomize_seed = gr.Checkbox(label="🎲 Randomize Seed", value=True)
430
+
431
+ with gr.Row():
432
+ true_guidance_scale = gr.Slider(
433
+ label="Guidance Scale",
434
+ minimum=1.0,
435
+ maximum=5.0,
436
+ step=0.1,
437
+ value=4.0
438
+ )
439
+ num_inference_steps = gr.Slider(
440
+ label="Inference Steps",
441
+ minimum=4,
442
+ maximum=16,
443
+ step=1,
444
+ value=8
445
+ )
446
+
447
+ num_images_per_prompt = gr.Slider(
448
+ label="Images per Prompt",
449
+ minimum=1,
450
+ maximum=4,
451
+ step=1,
452
+ value=1
453
+ )
454
+
455
+ run_button = gr.Button("πŸš€ Generate Edit", variant="primary", size="lg")
456
+
457
+ with gr.Column(scale=1):
458
+ result = gr.Gallery(
459
+ label="πŸ–ΌοΈ Output Images",
460
+ show_label=True,
461
+ columns=2,
462
+ rows=2,
463
+ elem_classes="gr-box"
464
+ )
465
+
466
+ # Prompt display component
467
+ prompt_info = gr.HTML(visible=False)
468
+
469
+ with gr.Group():
470
+ rewrite_toggle = gr.Checkbox(
471
+ label="πŸ€– Enable AI Prompt Enhancement",
472
+ value=False,
473
+ interactive=True
474
+ )
475
+ hf_token_input = gr.Textbox(
476
+ label="πŸ”‘ Hugging Face API Token",
477
+ type="password",
478
+ placeholder="hf_xxxxxxxxxxxxxxxx",
479
+ visible=False,
480
+ info="Your token is secure and only used for API calls. Get yours from HuggingFace settings.",
481
+ elem_classes="gr-box"
482
+ )
483
+
484
+ def toggle_token_visibility(checked):
485
+ return gr.update(visible=checked)
486
+
487
+ rewrite_toggle.change(
488
+ toggle_token_visibility,
489
+ inputs=[rewrite_toggle],
490
+ outputs=[hf_token_input]
491
+ )
492
+
493
+ # Examples section
494
+ gr.Examples(
495
+ examples=examples,
496
+ inputs=prompt,
497
+ label="πŸ’‘ Example Prompts"
498
+ )
499
+
500
+ gr.on(
501
+ triggers=[run_button.click, prompt.submit],
502
+ fn=infer,
503
+ inputs=[
504
+ input_image,
505
+ prompt,
506
+ seed,
507
+ randomize_seed,
508
+ true_guidance_scale,
509
+ num_inference_steps,
510
+ rewrite_toggle,
511
+ hf_token_input,
512
+ num_images_per_prompt
513
+ ],
514
+ outputs=[result, seed, prompt_info]
515
+ )
516
+
517
+ # Show prompt info box after processing
518
+ def set_prompt_visible():
519
+ return gr.update(visible=True)
520
+
521
+ run_button.click(
522
+ fn=set_prompt_visible,
523
+ inputs=None,
524
+ outputs=[prompt_info],
525
+ queue=False
526
+ )
527
+ prompt.submit(
528
+ fn=set_prompt_visible,
529
+ inputs=None,
530
+ outputs=[prompt_info],
531
+ queue=False
532
+ )
533
+
534
+ if __name__ == "__main__":
535
+ demo.launch()