Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import numpy as np | |
import random | |
import torch | |
import spaces | |
from PIL import Image | |
from diffusers import QwenImageEditPipeline | |
from diffusers.utils import is_xformers_available | |
import os | |
import base64 | |
import json | |
from huggingface_hub import InferenceClient | |
import logging | |
############################# | |
os.environ.setdefault('GRADIO_ANALYTICS_ENABLED', 'False') | |
os.environ.setdefault('HF_HUB_DISABLE_TELEMETRY', '1') | |
logging.basicConfig(level=logging.DEBUG) | |
logger = logging.getLogger(__name__) | |
############################# | |
def get_caption_language(prompt): | |
"""Detects if the prompt contains Chinese characters.""" | |
ranges = [ | |
('\u4e00', '\u9fff'), # CJK Unified Ideographs | |
] | |
for char in prompt: | |
if any(start <= char <= end for start, end in ranges): | |
return 'zh' | |
return 'en' | |
def polish_prompt(original_prompt, system_prompt, hf_token): | |
""" | |
Rewrites the prompt using a Hugging Face InferenceClient. | |
Requires user-provided HF token for API access. | |
""" | |
if not hf_token or not hf_token.strip(): | |
gr.Warning("HF Token is required for prompt rewriting but was not provided!") | |
return original_prompt | |
client = InferenceClient( | |
provider="cerebras", | |
api_key=hf_token, | |
) | |
messages = [ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": original_prompt} | |
] | |
try: | |
completion = client.chat.completions.create( | |
model="Qwen/Qwen3-235B-A22B-Instruct-2507", | |
messages=messages, | |
max_tokens=512, | |
) | |
polished_prompt = completion.choices[0].message.content | |
polished_prompt = polished_prompt.strip().replace("\n", " ") | |
return polished_prompt | |
except Exception as e: | |
print(f"Error during Hugging Face API call: {e}") | |
gr.Warning("Failed to rewrite prompt. Using original.") | |
return original_prompt | |
SYSTEM_PROMPT_EDIT = ''' | |
# Edit Instruction Rewriter | |
You are a professional edit instruction rewriter. Your task is to generate a precise, concise, and visually achievable instruction based on the user's intent and the input image. | |
## 1. General Principles | |
- Keep the rewritten instruction **concise** and clear. | |
- Avoid contradictions, vagueness, or unachievable instructions. | |
- Maintain the core logic of the original instruction; only enhance clarity and feasibility. | |
- Ensure new added elements or modifications align with the image's original context and art style. | |
## 2. Task Types | |
### Add, Delete, Replace: | |
- When the input is detailed, only refine grammar and clarity. | |
- For vague instructions, infer minimal but sufficient details. | |
- For replacement, use the format: `"Replace X with Y"`. | |
### Text Editing (e.g., text replacement): | |
- Enclose text content in quotes, e.g., `Replace "abc" with "xyz"`. | |
- Preserving the original structure and language—**do not translate** or alter style. | |
### Human Editing (e.g., change a person's face/hair): | |
- Preserve core visual identity (gender, ethnic features). | |
- Describe expressions in subtle and natural terms. | |
- Maintain key clothing or styling details unless explicitly replaced. | |
### Style Transformation: | |
- If a style is specified, e.g., `Disco style`, rewrite it to encapsulate the essential visual traits. | |
- Use a fixed template for **coloring/restoration**: | |
`"Restore old photograph, remove scratches, reduce noise, enhance details, high resolution, realistic, natural skin tones, clear facial features, no distortion, vintage photo restoration"` | |
if applicable. | |
## 4. Output Format | |
Please provide the rewritten instruction in a clean `json` format as: | |
{ | |
"Rewritten": "..." | |
} | |
''' | |
dtype = torch.bfloat16 | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
pipe = QwenImageEditPipeline.from_pretrained("Qwen/Qwen-Image-Edit", torch_dtype=dtype).to(device) | |
# Load LoRA weights for acceleration | |
pipe.load_lora_weights( | |
"lightx2v/Qwen-Image-Lightning", weight_name="Qwen-Image-Lightning-8steps-V1.1.safetensors" | |
) | |
pipe.fuse_lora() | |
if is_xformers_available(): | |
pipe.enable_xformers_memory_efficient_attention() | |
else: | |
print("xformers not available or failed to load.") | |
def infer( | |
image, | |
prompt, | |
seed=42, | |
randomize_seed=False, | |
true_guidance_scale=1.0, | |
num_inference_steps=8, | |
rewrite_prompt=False, | |
hf_token="", | |
num_images_per_prompt=1, | |
progress=gr.Progress(track_tqdm=True), | |
): | |
""" | |
Requires user-provided HF token for prompt rewriting. | |
""" | |
original_prompt = prompt # Save original prompt for display | |
negative_prompt = " " | |
prompt_info = "" # Initialize info text | |
# Handle prompt rewriting with status messages | |
if rewrite_prompt: | |
if not hf_token.strip(): | |
gr.Warning("HF Token is required for prompt rewriting but was not provided!") | |
prompt_info = f"""<div class="prompt-info-box warning"> | |
<h3>⚠️ Prompt Rewriting Skipped</h3> | |
<p><strong>Original:</strong> {original_prompt}</p> | |
<p class="note">HF Token required for enhancement</p> | |
</div>""" | |
rewritten_prompt = original_prompt | |
else: | |
try: | |
rewritten_prompt = polish_prompt(original_prompt, SYSTEM_PROMPT_EDIT, hf_token) | |
prompt_info = f"""<div class="prompt-info-box success"> | |
<h3>✨ Enhanced Successfully</h3> | |
<p><strong>Original:</strong> {original_prompt}</p> | |
<p><strong>Enhanced:</strong> {rewritten_prompt}</p> | |
</div>""" | |
except Exception as e: | |
gr.Warning(f"Prompt rewriting failed: {str(e)}") | |
rewritten_prompt = original_prompt | |
prompt_info = f"""<div class="prompt-info-box error"> | |
<h3>❌ Enhancement Failed</h3> | |
<p><strong>Original:</strong> {original_prompt}</p> | |
<p class="note">Error: {str(e)}</p> | |
</div>""" | |
else: | |
rewritten_prompt = original_prompt | |
prompt_info = f"""<div class="prompt-info-box default"> | |
<h3>📝 Original Prompt</h3> | |
<p>{original_prompt}</p> | |
</div>""" | |
# Generate images | |
if randomize_seed: | |
seed = random.randint(0, MAX_SEED) | |
generator = torch.Generator(device=device).manual_seed(seed) | |
edited_images = pipe( | |
image, | |
prompt=rewritten_prompt, | |
negative_prompt=negative_prompt, | |
num_inference_steps=num_inference_steps, | |
generator=generator, | |
true_cfg_scale=true_guidance_scale, | |
num_images_per_prompt=num_images_per_prompt, | |
).images | |
return edited_images, seed, prompt_info | |
MAX_SEED = np.iinfo(np.int32).max | |
examples = [ | |
"Replace the cat with a friendly golden retriever. Make it look happier, and add more background details.", | |
"Add text 'Qwen - AI for image editing' in Chinese at the bottom center with a small shadow.", | |
"Change the style to 1970s vintage, add old photo effect, restore any scratches on the wall or window.", | |
"Remove the blue sky and replace it with a dark night cityscape.", | |
"""Replace "Qwen" with "通义" in the Image. Ensure Chinese font is used for "通义" and position it to the top left with a light heading-style font.""" | |
] | |
# Custom CSS for enhanced visual design | |
custom_css = """ | |
/* Gradient background */ | |
.gradio-container { | |
background: linear-gradient(135deg, #667eea 0%, #764ba2 25%, #f093fb 50%, #fecfef 75%, #fecfef 100%); | |
min-height: 100vh; | |
} | |
/* Main container styling */ | |
.container { | |
max-width: 1400px !important; | |
margin: 0 auto !important; | |
padding: 2rem !important; | |
} | |
/* Card-like sections */ | |
.gr-box { | |
background: rgba(255, 255, 255, 0.95) !important; | |
backdrop-filter: blur(10px) !important; | |
border-radius: 20px !important; | |
box-shadow: 0 20px 40px rgba(0, 0, 0, 0.1) !important; | |
border: 1px solid rgba(255, 255, 255, 0.5) !important; | |
padding: 1.5rem !important; | |
margin-bottom: 1.5rem !important; | |
} | |
/* Header styling */ | |
h1 { | |
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
-webkit-background-clip: text; | |
-webkit-text-fill-color: transparent; | |
background-clip: text; | |
font-size: 3rem !important; | |
font-weight: 800 !important; | |
text-align: center; | |
margin-bottom: 0.5rem !important; | |
text-shadow: 2px 2px 4px rgba(0,0,0,0.1); | |
} | |
h2 { | |
color: #4a5568 !important; | |
font-size: 1.5rem !important; | |
font-weight: 600 !important; | |
margin-bottom: 1rem !important; | |
} | |
/* Button styling */ | |
.gr-button-primary { | |
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; | |
border: none !important; | |
color: white !important; | |
font-weight: 600 !important; | |
font-size: 1.1rem !important; | |
padding: 0.8rem 2rem !important; | |
border-radius: 12px !important; | |
box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4) !important; | |
transition: all 0.3s ease !important; | |
} | |
.gr-button-primary:hover { | |
transform: translateY(-2px) !important; | |
box-shadow: 0 6px 20px rgba(102, 126, 234, 0.5) !important; | |
} | |
/* Input fields styling */ | |
.gr-input, .gr-text-input, .gr-slider, .gr-dropdown { | |
border-radius: 10px !important; | |
border: 2px solid #e2e8f0 !important; | |
background: white !important; | |
transition: all 0.3s ease !important; | |
} | |
.gr-input:focus, .gr-text-input:focus { | |
border-color: #667eea !important; | |
box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1) !important; | |
} | |
/* Accordion styling */ | |
.gr-accordion { | |
background: rgba(255, 255, 255, 0.8) !important; | |
border-radius: 12px !important; | |
border: 1px solid rgba(102, 126, 234, 0.2) !important; | |
overflow: hidden !important; | |
} | |
/* Gallery styling */ | |
.gr-gallery { | |
border-radius: 12px !important; | |
overflow: hidden !important; | |
} | |
/* Prompt info boxes */ | |
.prompt-info-box { | |
padding: 1.5rem; | |
border-radius: 12px; | |
margin: 1rem 0; | |
animation: fadeIn 0.5s ease; | |
} | |
.prompt-info-box h3 { | |
margin: 0 0 0.75rem 0; | |
font-size: 1.2rem; | |
font-weight: 600; | |
} | |
.prompt-info-box p { | |
margin: 0.5rem 0; | |
line-height: 1.6; | |
} | |
.prompt-info-box.success { | |
background: linear-gradient(135deg, #d4f4dd 0%, #e3f9e5 100%); | |
border-left: 4px solid #48bb78; | |
} | |
.prompt-info-box.warning { | |
background: linear-gradient(135deg, #fef5e7 0%, #fff9ec 100%); | |
border-left: 4px solid #f6ad55; | |
} | |
.prompt-info-box.error { | |
background: linear-gradient(135deg, #fed7d7 0%, #fee5e5 100%); | |
border-left: 4px solid #fc8181; | |
} | |
.prompt-info-box.default { | |
background: linear-gradient(135deg, #e6f3ff 0%, #f0f7ff 100%); | |
border-left: 4px solid #667eea; | |
} | |
.prompt-info-box .note { | |
font-size: 0.9rem; | |
color: #718096; | |
font-style: italic; | |
} | |
/* Checkbox styling */ | |
.gr-checkbox { | |
background: white !important; | |
border-radius: 8px !important; | |
padding: 0.5rem !important; | |
} | |
/* Token input field */ | |
input[type="password"] { | |
font-family: monospace !important; | |
letter-spacing: 0.05em !important; | |
} | |
/* Info badges */ | |
.gr-markdown p { | |
color: #4a5568; | |
line-height: 1.6; | |
} | |
.gr-markdown a { | |
color: #667eea !important; | |
text-decoration: none !important; | |
font-weight: 500 !important; | |
transition: color 0.3s ease !important; | |
} | |
.gr-markdown a:hover { | |
color: #764ba2 !important; | |
text-decoration: underline !important; | |
} | |
/* Animation */ | |
@keyframes fadeIn { | |
from { | |
opacity: 0; | |
transform: translateY(10px); | |
} | |
to { | |
opacity: 1; | |
transform: translateY(0); | |
} | |
} | |
/* Slider styling */ | |
.gr-slider input[type="range"] { | |
background: linear-gradient(90deg, #667eea 0%, #764ba2 100%) !important; | |
} | |
/* Group styling */ | |
.gr-group { | |
background: rgba(249, 250, 251, 0.8) !important; | |
border-radius: 12px !important; | |
padding: 1rem !important; | |
margin-top: 1rem !important; | |
} | |
/* Loading spinner customization */ | |
.gr-loading { | |
color: #667eea !important; | |
} | |
/* Example buttons */ | |
.gr-examples button { | |
background: white !important; | |
border: 2px solid #e2e8f0 !important; | |
border-radius: 8px !important; | |
padding: 0.5rem 1rem !important; | |
transition: all 0.3s ease !important; | |
} | |
.gr-examples button:hover { | |
border-color: #667eea !important; | |
background: rgba(102, 126, 234, 0.05) !important; | |
} | |
""" | |
with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo: | |
gr.Markdown("# 🎨 Nano-Banana") | |
gr.Markdown("✨ **Ultra-fast 8-step image editing with AI-powered prompt enhancement**") | |
gr.Markdown("🔐 **Secure prompt rewriting with your [Hugging Face token](https://huggingface.co/settings/tokens)**") | |
# 배지를 가운데 정렬하여 나란히 배치 | |
gr.HTML(""" | |
<div style="display: flex; justify-content: center; align-items: center; gap: 20px; margin: 20px 0;"> | |
<a href="https://huggingface.co/spaces/Heartsync/Nano-Banana" target="_blank"> | |
<img src="https://img.shields.io/static/v1?label=OPEN%20NANO-BANANA&message=Image%20EDITOR&color=%230000ff&labelColor=%23800080&logo=huggingface&logoColor=white&style=for-the-badge" alt="badge"> | |
</a> | |
</div> | |
""") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
with gr.Group(): | |
input_image = gr.Image( | |
label="📸 Input Image", | |
type="pil", | |
elem_classes="gr-box" | |
) | |
prompt = gr.Text( | |
label="✏️ Edit Instruction", | |
placeholder="e.g. Add a dog to the right side, change the sky to sunset...", | |
lines=3, | |
elem_classes="gr-box" | |
) | |
with gr.Accordion("⚙️ Advanced Settings", open=False): | |
seed = gr.Slider( | |
label="Seed", | |
minimum=0, | |
maximum=MAX_SEED, | |
step=1, | |
value=0 | |
) | |
randomize_seed = gr.Checkbox(label="🎲 Randomize Seed", value=True) | |
with gr.Row(): | |
true_guidance_scale = gr.Slider( | |
label="Guidance Scale", | |
minimum=1.0, | |
maximum=5.0, | |
step=0.1, | |
value=4.0 | |
) | |
num_inference_steps = gr.Slider( | |
label="Inference Steps", | |
minimum=4, | |
maximum=16, | |
step=1, | |
value=8 | |
) | |
num_images_per_prompt = gr.Slider( | |
label="Images per Prompt", | |
minimum=1, | |
maximum=4, | |
step=1, | |
value=1 | |
) | |
run_button = gr.Button("🚀 Generate Edit", variant="primary", size="lg") | |
with gr.Column(scale=1): | |
result = gr.Gallery( | |
label="🖼️ Output Images", | |
show_label=True, | |
columns=2, | |
rows=2, | |
elem_classes="gr-box" | |
) | |
# Prompt display component | |
prompt_info = gr.HTML(visible=False) | |
with gr.Group(): | |
rewrite_toggle = gr.Checkbox( | |
label="🤖 Enable AI Prompt Enhancement", | |
value=False, | |
interactive=True | |
) | |
hf_token_input = gr.Textbox( | |
label="🔑 Hugging Face API Token", | |
type="password", | |
placeholder="hf_xxxxxxxxxxxxxxxx", | |
visible=False, | |
info="Your token is secure and only used for API calls. Get yours from HuggingFace settings.", | |
elem_classes="gr-box" | |
) | |
def toggle_token_visibility(checked): | |
return gr.update(visible=checked) | |
rewrite_toggle.change( | |
toggle_token_visibility, | |
inputs=[rewrite_toggle], | |
outputs=[hf_token_input] | |
) | |
# Examples section | |
gr.Examples( | |
examples=examples, | |
inputs=prompt, | |
label="💡 Example Prompts" | |
) | |
gr.on( | |
triggers=[run_button.click, prompt.submit], | |
fn=infer, | |
inputs=[ | |
input_image, | |
prompt, | |
seed, | |
randomize_seed, | |
true_guidance_scale, | |
num_inference_steps, | |
rewrite_toggle, | |
hf_token_input, | |
num_images_per_prompt | |
], | |
outputs=[result, seed, prompt_info] | |
) | |
# Show prompt info box after processing | |
def set_prompt_visible(): | |
return gr.update(visible=True) | |
run_button.click( | |
fn=set_prompt_visible, | |
inputs=None, | |
outputs=[prompt_info], | |
queue=False | |
) | |
prompt.submit( | |
fn=set_prompt_visible, | |
inputs=None, | |
outputs=[prompt_info], | |
queue=False | |
) | |
if __name__ == "__main__": | |
demo.launch() |