Nano-Banana-API / app.py
aiqtech's picture
Update app.py
bb8a67b verified
raw
history blame
21.3 kB
import gradio as gr
from google import genai
from google.genai import types
import os
from typing import Optional, List
from huggingface_hub import whoami
from PIL import Image
from io import BytesIO
import tempfile
# --- Google Gemini API Configuration ---
# Use GEMINI_API_KEY if available, otherwise fall back to GOOGLE_API_KEY
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
API_KEY = GEMINI_API_KEY or GOOGLE_API_KEY
if not API_KEY:
raise ValueError("Neither GEMINI_API_KEY nor GOOGLE_API_KEY environment variable is set.")
client = genai.Client(
api_key=API_KEY,
)
GEMINI_MODEL_NAME = 'gemini-2.5-flash-image-preview'
def verify_pro_status(token: Optional[gr.OAuthToken]) -> bool:
"""Verifies if the user is a Hugging Face PRO user or part of an enterprise org."""
if not token:
return False
try:
user_info = whoami(token=token.token)
if user_info.get("isPro", False):
return True
orgs = user_info.get("orgs", [])
if any(org.get("isEnterprise", False) for org in orgs):
return True
return False
except Exception as e:
print(f"Could not verify user's PRO/Enterprise status: {e}")
return False
def _extract_image_data_from_response(response) -> Optional[bytes]:
"""Helper to extract image data from the model's response."""
# Debug: Print response structure
print(f"Response type: {type(response)}")
# Try multiple ways to extract image data
# Method 1: Direct image attribute
if hasattr(response, 'image'):
print("Found response.image")
return response.image
# Method 2: Images array
if hasattr(response, 'images') and response.images:
print(f"Found response.images with {len(response.images)} images")
return response.images[0]
# Method 3: Candidates with parts
if hasattr(response, 'candidates') and response.candidates:
print(f"Found {len(response.candidates)} candidates")
for i, candidate in enumerate(response.candidates):
print(f"Candidate {i}: {type(candidate)}")
# Check for content.parts
if hasattr(candidate, 'content'):
print(f" Has content: {type(candidate.content)}")
if hasattr(candidate.content, 'parts') and candidate.content.parts:
print(f" Has {len(candidate.content.parts)} parts")
for j, part in enumerate(candidate.content.parts):
print(f" Part {j}: {type(part)}")
# Check for inline_data
if hasattr(part, 'inline_data'):
print(f" Has inline_data")
if hasattr(part.inline_data, 'data'):
print(f" Found image data!")
return part.inline_data.data
if hasattr(part.inline_data, 'blob'):
print(f" Found blob data!")
return part.inline_data.blob
# Check for blob directly
if hasattr(part, 'blob'):
print(f" Has blob")
return part.blob
# Check for data directly
if hasattr(part, 'data'):
print(f" Has data")
return part.data
# Method 4: Text response (might need different API configuration)
if hasattr(response, 'text'):
print(f"Response has text but no image: {response.text[:200] if response.text else 'Empty'}")
print("No image data found in response")
return None
def run_single_image_logic(prompt: str, image_path: Optional[str] = None, progress=gr.Progress()) -> str:
"""Handles text-to-image or single image-to-image using Google Gemini."""
try:
progress(0.2, desc="๐ŸŽจ ์ค€๋น„ ์ค‘...")
# Prepare the prompt with image generation instruction
generation_prompt = f"Generate an image: {prompt}"
contents = []
if image_path:
# Image-to-image generation
input_image = Image.open(image_path)
contents.append(input_image)
contents.append(f"Edit this image: {prompt}")
else:
# Text-to-image generation
contents.append(generation_prompt)
progress(0.5, desc="โœจ ์ƒ์„ฑ ์ค‘...")
# Try with generation config for images
generation_config = types.GenerationConfig(
temperature=1.0,
max_output_tokens=8192,
)
response = client.models.generate_content(
model=GEMINI_MODEL_NAME,
contents=contents,
generation_config=generation_config,
)
# Debug: Print full response
print(f"Full response: {response}")
progress(0.8, desc="๐Ÿ–ผ๏ธ ๋งˆ๋ฌด๋ฆฌ ์ค‘...")
image_data = _extract_image_data_from_response(response)
if not image_data:
# Try alternative approach - generate_images if available
if hasattr(client.models, 'generate_images'):
print("Trying generate_images method...")
response = client.models.generate_images(
model=GEMINI_MODEL_NAME,
prompt=prompt,
n=1,
)
if hasattr(response, 'images') and response.images:
image_data = response.images[0]
if not image_data:
raise ValueError("No image data found in the model response. The API might not support image generation or the model name might be incorrect.")
# Save the generated image to a temporary file to return its path
pil_image = Image.open(BytesIO(image_data))
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmpfile:
pil_image.save(tmpfile.name)
progress(1.0, desc="โœ… ์™„๋ฃŒ!")
return tmpfile.name
except Exception as e:
print(f"Error details: {e}")
print(f"Error type: {type(e)}")
raise gr.Error(f"์ด๋ฏธ์ง€ ์ƒ์„ฑ ์‹คํŒจ: {e}")
def run_multi_image_logic(prompt: str, images: List[str], progress=gr.Progress()) -> str:
"""
Handles multi-image editing by sending a list of images and a prompt.
"""
if not images:
raise gr.Error("'์—ฌ๋Ÿฌ ์ด๋ฏธ์ง€' ํƒญ์—์„œ ์ตœ์†Œ ํ•œ ๊ฐœ์˜ ์ด๋ฏธ์ง€๋ฅผ ์—…๋กœ๋“œํ•ด์ฃผ์„ธ์š”.")
try:
progress(0.2, desc="๐ŸŽจ ์ด๋ฏธ์ง€ ์ค€๋น„ ์ค‘...")
contents = []
for image_path in images:
if isinstance(image_path, (list, tuple)):
image_path = image_path[0]
contents.append(Image.open(image_path))
contents.append(f"Combine/edit these images: {prompt}")
progress(0.5, desc="โœจ ์ƒ์„ฑ ์ค‘...")
generation_config = types.GenerationConfig(
temperature=1.0,
max_output_tokens=8192,
)
response = client.models.generate_content(
model=GEMINI_MODEL_NAME,
contents=contents,
generation_config=generation_config,
)
# Debug: Print full response
print(f"Multi-image response: {response}")
progress(0.8, desc="๐Ÿ–ผ๏ธ ๋งˆ๋ฌด๋ฆฌ ์ค‘...")
image_data = _extract_image_data_from_response(response)
if not image_data:
raise ValueError("No image data found in the model response. The API might not support multi-image generation.")
pil_image = Image.open(BytesIO(image_data))
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmpfile:
pil_image.save(tmpfile.name)
progress(1.0, desc="โœ… ์™„๋ฃŒ!")
return tmpfile.name
except Exception as e:
print(f"Multi-image error details: {e}")
raise gr.Error(f"์ด๋ฏธ์ง€ ์ƒ์„ฑ ์‹คํŒจ: {e}")
# --- Gradio App UI ---
css = '''
/* Header Styling */
.main-header {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
padding: 2rem;
border-radius: 1rem;
margin-bottom: 2rem;
box-shadow: 0 10px 30px rgba(0,0,0,0.1);
}
.header-title {
font-size: 2.5rem !important;
font-weight: bold;
color: white;
text-align: center;
margin: 0 !important;
text-shadow: 2px 2px 4px rgba(0,0,0,0.2);
}
.header-subtitle {
color: rgba(255,255,255,0.9);
text-align: center;
margin-top: 0.5rem !important;
font-size: 1.1rem;
}
/* Card Styling */
.card {
background: white;
border-radius: 1rem;
padding: 1.5rem;
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
border: 1px solid rgba(0,0,0,0.05);
}
.dark .card {
background: #1f2937;
border: 1px solid #374151;
}
/* Tab Styling */
.tabs {
border-radius: 0.5rem;
overflow: hidden;
margin-bottom: 1rem;
}
.tabitem {
padding: 1rem !important;
}
button.selected {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
color: white !important;
}
/* Button Styling */
.generate-btn {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
border: none !important;
color: white !important;
font-size: 1.1rem !important;
font-weight: 600 !important;
padding: 0.8rem 2rem !important;
border-radius: 0.5rem !important;
cursor: pointer !important;
transition: all 0.3s ease !important;
width: 100% !important;
margin-top: 1rem !important;
}
.generate-btn:hover {
transform: translateY(-2px) !important;
box-shadow: 0 10px 20px rgba(102, 126, 234, 0.4) !important;
}
.use-btn {
background: linear-gradient(135deg, #10b981 0%, #059669 100%) !important;
border: none !important;
color: white !important;
font-weight: 600 !important;
padding: 0.6rem 1.5rem !important;
border-radius: 0.5rem !important;
cursor: pointer !important;
transition: all 0.3s ease !important;
width: 100% !important;
}
.use-btn:hover {
transform: translateY(-1px) !important;
box-shadow: 0 5px 15px rgba(16, 185, 129, 0.4) !important;
}
/* Input Styling */
.prompt-input textarea {
border-radius: 0.5rem !important;
border: 2px solid #e5e7eb !important;
padding: 0.8rem !important;
font-size: 1rem !important;
transition: border-color 0.3s ease !important;
}
.prompt-input textarea:focus {
border-color: #667eea !important;
outline: none !important;
}
.dark .prompt-input textarea {
border-color: #374151 !important;
background: #1f2937 !important;
}
/* Image Output Styling */
#output {
border-radius: 0.5rem !important;
overflow: hidden !important;
box-shadow: 0 4px 6px rgba(0,0,0,0.1) !important;
}
/* Progress Bar Styling */
.progress-bar {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
}
/* Examples Styling */
.examples {
background: #f9fafb;
border-radius: 0.5rem;
padding: 1rem;
margin-top: 1rem;
}
.dark .examples {
background: #1f2937;
}
/* Pro Message Styling */
.pro-message {
background: linear-gradient(135deg, #fef3c7 0%, #fde68a 100%);
border-radius: 1rem;
padding: 2rem;
text-align: center;
border: 2px solid #f59e0b;
}
.dark .pro-message {
background: linear-gradient(135deg, #7c2d12 0%, #92400e 100%);
border-color: #f59e0b;
}
/* Emoji Animations */
@keyframes bounce {
0%, 100% { transform: translateY(0); }
50% { transform: translateY(-10px); }
}
.emoji-icon {
display: inline-block;
animation: bounce 2s infinite;
}
/* Responsive Design */
@media (max-width: 768px) {
.header-title {
font-size: 2rem !important;
}
.main-container {
padding: 1rem !important;
}
}
'''
with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
# Header
gr.HTML('''
<div class="main-header">
<h1 class="header-title">
๐ŸŒ Real Nano Banana
</h1>
<p class="header-subtitle">
Google Gemini 2.5 Flash Image Preview๋กœ ๊ตฌ๋™๋˜๋Š” AI ์ด๋ฏธ์ง€ ์ƒ์„ฑ๊ธฐ
</p>
</div>
''')
# Pro User Notice
gr.HTML('''
<div style="background: linear-gradient(135deg, #fef3c7 0%, #fde68a 100%);
border-radius: 0.5rem; padding: 1rem; margin-bottom: 1.5rem;
border-left: 4px solid #f59e0b;">
<p style="margin: 0; color: #92400e; font-weight: 600;">
๐ŸŒŸ ์ด ์ŠคํŽ˜์ด์Šค๋Š” Hugging Face PRO ์‚ฌ์šฉ์ž ์ „์šฉ์ž…๋‹ˆ๋‹ค.
<a href="https://huggingface.co/pro" target="_blank"
style="color: #dc2626; text-decoration: underline;">
PRO ๊ตฌ๋…ํ•˜๊ธฐ
</a>
</p>
</div>
''')
pro_message = gr.Markdown(visible=False)
main_interface = gr.Column(visible=False, elem_classes="main-container")
with main_interface:
with gr.Row():
with gr.Column(scale=1):
gr.HTML('<div class="card">')
# Mode Selection
gr.HTML('<h3 style="margin-top: 0;">๐Ÿ“ธ ๋ชจ๋“œ ์„ ํƒ</h3>')
active_tab_state = gr.State(value="single")
with gr.Tabs(elem_classes="tabs") as tabs:
with gr.TabItem("๐Ÿ–ผ๏ธ ๋‹จ์ผ ์ด๋ฏธ์ง€", id="single") as single_tab:
image_input = gr.Image(
type="filepath",
label="์ž…๋ ฅ ์ด๋ฏธ์ง€",
elem_classes="image-input"
)
gr.HTML('''
<p style="text-align: center; color: #6b7280; font-size: 0.9rem; margin-top: 0.5rem;">
๐Ÿ’ก ํ…์ŠคํŠธโ†’์ด๋ฏธ์ง€ ์ƒ์„ฑ์€ ๋น„์›Œ๋‘์„ธ์š”
</p>
''')
with gr.TabItem("๐ŸŽจ ๋‹ค์ค‘ ์ด๋ฏธ์ง€", id="multiple") as multi_tab:
gallery_input = gr.Gallery(
label="์ž…๋ ฅ ์ด๋ฏธ์ง€๋“ค",
file_types=["image"],
elem_classes="gallery-input"
)
gr.HTML('''
<p style="text-align: center; color: #6b7280; font-size: 0.9rem; margin-top: 0.5rem;">
๐Ÿ’ก ์—ฌ๋Ÿฌ ์ด๋ฏธ์ง€๋ฅผ ๋“œ๋ž˜๊ทธ ์•ค ๋“œ๋กญํ•˜์„ธ์š”
</p>
''')
# Prompt Input
gr.HTML('<h3>โœ๏ธ ํ”„๋กฌํ”„ํŠธ</h3>')
prompt_input = gr.Textbox(
label="",
info="AI์—๊ฒŒ ์›ํ•˜๋Š” ๊ฒƒ์„ ์„ค๋ช…ํ•˜์„ธ์š”",
placeholder="์˜ˆ: ๋ง›์žˆ์–ด ๋ณด์ด๋Š” ํ”ผ์ž, ์šฐ์ฃผ๋ฅผ ๋ฐฐ๊ฒฝ์œผ๋กœ ํ•œ ๊ณ ์–‘์ด, ๋ฏธ๋ž˜์ ์ธ ๋„์‹œ ํ’๊ฒฝ...",
lines=3,
elem_classes="prompt-input"
)
# Generate Button
generate_button = gr.Button(
"๐Ÿš€ ์ƒ์„ฑํ•˜๊ธฐ",
variant="primary",
elem_classes="generate-btn"
)
# Examples
with gr.Accordion("๐Ÿ’ก ์˜ˆ์ œ ํ”„๋กฌํ”„ํŠธ", open=False):
gr.Examples(
examples=[
["์น˜์ฆˆ๊ฐ€ ๋Š˜์–ด๋‚˜๋Š” ๋ง›์žˆ์–ด ๋ณด์ด๋Š” ํ”ผ์ž"],
["์šฐ์ฃผ๋ณต์„ ์ž…์€ ๊ณ ์–‘์ด๊ฐ€ ๋‹ฌ ํ‘œ๋ฉด์„ ๊ฑท๊ณ  ์žˆ๋Š” ๋ชจ์Šต"],
["๋„ค์˜จ ๋ถˆ๋น›์ด ๋น›๋‚˜๋Š” ์‚ฌ์ด๋ฒ„ํŽ‘ํฌ ๋„์‹œ์˜ ์•ผ๊ฒฝ"],
["๋ด„๋‚  ๋ฒš๊ฝƒ์ด ๋งŒ๊ฐœํ•œ ์ผ๋ณธ ์ •์›"],
["ํŒํƒ€์ง€ ์„ธ๊ณ„์˜ ๋งˆ๋ฒ•์‚ฌ ํƒ‘"],
],
inputs=prompt_input
)
gr.HTML('</div>')
with gr.Column(scale=1):
gr.HTML('<div class="card">')
gr.HTML('<h3 style="margin-top: 0;">๐ŸŽจ ์ƒ์„ฑ ๊ฒฐ๊ณผ</h3>')
output_image = gr.Image(
label="",
interactive=False,
elem_id="output"
)
use_image_button = gr.Button(
"โ™ป๏ธ ์ด ์ด๋ฏธ์ง€๋ฅผ ๋‹ค์Œ ํŽธ์ง‘์— ์‚ฌ์šฉ",
elem_classes="use-btn",
visible=False
)
# Tips
gr.HTML('''
<div style="background: #f0f9ff; border-radius: 0.5rem; padding: 1rem; margin-top: 1rem;">
<h4 style="margin-top: 0; color: #0369a1;">๐Ÿ’ก ํŒ</h4>
<ul style="margin: 0; padding-left: 1.5rem; color: #0c4a6e;">
<li>๊ตฌ์ฒด์ ์ด๊ณ  ์ƒ์„ธํ•œ ํ”„๋กฌํ”„ํŠธ๋ฅผ ์‚ฌ์šฉํ•˜์„ธ์š”</li>
<li>์ƒ์„ฑ๋œ ์ด๋ฏธ์ง€๋ฅผ ์žฌ์‚ฌ์šฉํ•˜์—ฌ ๋ฐ˜๋ณต์ ์œผ๋กœ ๊ฐœ์„ ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค</li>
<li>๋‹ค์ค‘ ์ด๋ฏธ์ง€ ๋ชจ๋“œ๋กœ ์—ฌ๋Ÿฌ ์ฐธ์กฐ ์ด๋ฏธ์ง€๋ฅผ ๊ฒฐํ•ฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค</li>
</ul>
</div>
''')
gr.HTML('</div>')
# Footer
gr.HTML('''
<div style="text-align: center; margin-top: 2rem; padding: 1rem;
border-top: 1px solid #e5e7eb;">
<p style="color: #6b7280;">
Made with ๐Ÿ’œ by Hugging Face PRO | Powered by Google Gemini 2.5 Flash
</p>
</div>
''')
login_button = gr.LoginButton()
# --- Event Handlers ---
def unified_generator(
prompt: str,
single_image: Optional[str],
multi_images: Optional[List[str]],
active_tab: str,
oauth_token: Optional[gr.OAuthToken] = None,
):
if not verify_pro_status(oauth_token):
raise gr.Error("์•ก์„ธ์Šค ๊ฑฐ๋ถ€: ์ด ์„œ๋น„์Šค๋Š” PRO ์‚ฌ์šฉ์ž ์ „์šฉ์ž…๋‹ˆ๋‹ค.")
if not prompt:
raise gr.Error("ํ”„๋กฌํ”„ํŠธ๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”.")
if active_tab == "multiple" and multi_images:
result = run_multi_image_logic(prompt, multi_images)
else:
result = run_single_image_logic(prompt, single_image)
return result, gr.update(visible=True)
single_tab.select(lambda: "single", None, active_tab_state)
multi_tab.select(lambda: "multiple", None, active_tab_state)
generate_button.click(
unified_generator,
inputs=[prompt_input, image_input, gallery_input, active_tab_state],
outputs=[output_image, use_image_button],
)
use_image_button.click(
lambda img: (img, gr.update(visible=False)),
inputs=[output_image],
outputs=[image_input, use_image_button]
)
# --- Access Control Logic ---
def control_access(
profile: Optional[gr.OAuthProfile] = None,
oauth_token: Optional[gr.OAuthToken] = None
):
if not profile:
return gr.update(visible=False), gr.update(visible=False)
if verify_pro_status(oauth_token):
return gr.update(visible=True), gr.update(visible=False)
else:
message = '''
<div class="pro-message">
<h2>โœจ PRO ์‚ฌ์šฉ์ž ์ „์šฉ ๊ธฐ๋Šฅ</h2>
<p style="font-size: 1.1rem; margin: 1rem 0;">
์ด ๊ฐ•๋ ฅํ•œ AI ์ด๋ฏธ์ง€ ์ƒ์„ฑ ๋„๊ตฌ๋Š” Hugging Face <strong>PRO</strong> ๋ฉค๋ฒ„ ์ „์šฉ์ž…๋‹ˆ๋‹ค.
</p>
<p style="margin: 1rem 0;">
PRO ๊ตฌ๋…์œผ๋กœ ๋‹ค์Œ์„ ๋ˆ„๋ฆฌ์„ธ์š”:
</p>
<ul style="text-align: left; display: inline-block; margin: 1rem 0;">
<li>๐Ÿš€ Google Gemini 2.5 Flash ๋ฌด์ œํ•œ ์•ก์„ธ์Šค</li>
<li>โšก ๋น ๋ฅธ ์ด๋ฏธ์ง€ ์ƒ์„ฑ</li>
<li>๐ŸŽจ ๊ณ ํ’ˆ์งˆ ๊ฒฐ๊ณผ๋ฌผ</li>
<li>๐Ÿ”ง ๋‹ค์ค‘ ์ด๋ฏธ์ง€ ํŽธ์ง‘ ๊ธฐ๋Šฅ</li>
</ul>
<a href="https://huggingface.co/pro" target="_blank"
style="display: inline-block; margin-top: 1rem; padding: 1rem 2rem;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white; text-decoration: none; border-radius: 0.5rem;
font-weight: bold; font-size: 1.1rem;">
๐ŸŒŸ ์ง€๊ธˆ PRO ๋ฉค๋ฒ„ ๋˜๊ธฐ
</a>
</div>
'''
return gr.update(visible=False), gr.update(visible=True, value=message)
demo.load(control_access, inputs=None, outputs=[main_interface, pro_message])
if __name__ == "__main__":
demo.queue(max_size=None, default_concurrency_limit=None)
demo.launch()