nano-banana / app.py
multimodalart's picture
Update app.py
7541de2 verified
raw
history blame
9.34 kB
import gradio as gr
import google.generativeai as genai
import os
from typing import Optional, List
from huggingface_hub import whoami
from PIL import Image
import tempfile
import io # Import io for handling in-memory binary streams
# --- Google Gemini API Configuration ---
# Set your Google API key as an environment variable
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY", "")
if not GOOGLE_API_KEY:
raise ValueError("GOOGLE_API_KEY environment variable not set.")
genai.configure(api_key=GOOGLE_API_KEY)
# --- Define the correct model name ---
GEMINI_MODEL_NAME = 'gemini-2.5-flash-image-preview'
def verify_pro_status(token: Optional[gr.OAuthToken]) -> bool:
"""Verifies if the user is a Hugging Face PRO user or part of an enterprise org."""
if not token:
return False
try:
user_info = whoami(token=token.token)
if user_info.get("isPro", False):
return True
orgs = user_info.get("orgs", [])
if any(org.get("isEnterprise", False) for org in orgs):
return True
return False
except Exception as e:
print(f"Could not verify user's PRO/Enterprise status: {e}")
return False
# --- Backend Generation Functions ---
def run_single_image_logic(prompt: str, image_path: Optional[str] = None) -> str:
"""Handles text-to-image or single image-to-image using Google Gemini."""
try:
model = genai.GenerativeModel(GEMINI_MODEL_NAME) # Use the defined model name
contents = [prompt]
if image_path:
input_image = Image.open(image_path)
contents.append(input_image)
response = model.generate_content(contents)
# Access the image data correctly based on the response structure
# Assuming the generated content might be in response.candidates[0].content.parts[0].inline_data.data
# Or direct from response.parts if it's a single part with inline_data
image_data = None
if hasattr(response, 'parts') and response.parts:
for part in response.parts:
if hasattr(part, 'inline_data') and hasattr(part.inline_data, 'data'):
image_data = part.inline_data.data
break
elif hasattr(response, 'candidates') and response.candidates:
for candidate in response.candidates:
if hasattr(candidate, 'content') and hasattr(candidate.content, 'parts') and candidate.content.parts:
for part in candidate.content.parts:
if hasattr(part, 'inline_data') and hasattr(part.inline_data, 'data'):
image_data = part.inline_data.data
break
if image_data:
break
if not image_data:
raise ValueError("No image data found in the model response.")
# Save the generated image to a temporary file to return its path
pil_image = Image.open(io.BytesIO(image_data))
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmpfile:
pil_image.save(tmpfile.name)
return tmpfile.name
except Exception as e:
raise gr.Error(f"Image generation failed: {e}")
def run_multi_image_logic(prompt: str, images: List[str]) -> str:
"""
Handles multi-image editing by sending a list of images and a prompt.
"""
if not images:
raise gr.Error("Please upload at least one image in the 'Multiple Images' tab.")
try:
model = genai.GenerativeModel(GEMINI_MODEL_NAME) # Use the defined model name
# The prompt should be the last part of the contents list
contents = [Image.open(image_path[0]) for image_path in images]
contents.append(prompt)
response = model.generate_content(contents)
image_data = None
if hasattr(response, 'parts') and response.parts:
for part in response.parts:
if hasattr(part, 'inline_data') and hasattr(part.inline_data, 'data'):
image_data = part.inline_data.data
break
elif hasattr(response, 'candidates') and response.candidates:
for candidate in response.candidates:
if hasattr(candidate, 'content') and hasattr(candidate.content, 'parts') and candidate.content.parts:
for part in candidate.content.parts:
if hasattr(part, 'inline_data') and hasattr(part.inline_data, 'data'):
image_data = part.inline_data.data
break
if image_data:
break
if not image_data:
raise ValueError("No image data found in the model response.")
# Save the generated image to a temporary file to return its path
pil_image = Image.open(io.BytesIO(image_data))
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmpfile:
pil_image.save(tmpfile.name)
return tmpfile.name
except Exception as e:
raise gr.Error(f"Image generation failed: {e}")
# --- Gradio App UI ---
css = '''
#sub_title{margin-top: -35px !important}
.tab-wrapper{margin-bottom: -33px !important}
.tabitem{padding: 0px !important}
#output{margin-top: 25px}
.fillable{max-width: 980px !important}
.dark .progress-text {color: white}
'''
with gr.Blocks(theme=gr.themes.Citrus(), css=css) as demo:
gr.HTML("<h1 style='text-align:center'>Image Generation with Google Gemini</h1>")
gr.HTML("<h3 style='text-align:center'>Hugging Face PRO users can use Google's Gemini 2.5 Flash Image Preview on this Space. <a href='https://huggingface.co/pro' target='_blank'>Subscribe to PRO</a></h3>", elem_id="sub_title")
pro_message = gr.Markdown(visible=False)
main_interface = gr.Column(visible=False)
with main_interface:
with gr.Row():
with gr.Column(scale=1):
active_tab_state = gr.State(value="single")
with gr.Tabs() as tabs:
with gr.TabItem("Single Image", id="single") as single_tab:
image_input = gr.Image(
type="filepath",
label="Input Image (Leave blank for text-to-image)"
)
with gr.TabItem("Multiple Images", id="multiple") as multi_tab:
gallery_input = gr.Gallery(
label="Input Images (drop all images here)", file_types=["image"]
)
prompt_input = gr.Textbox(
label="Prompt",
info="Tell the model what you want it to do",
placeholder="A delicious looking pizza"
)
generate_button = gr.Button("Generate", variant="primary")
with gr.Column(scale=1):
output_image = gr.Image(label="Output", interactive=False, elem_id="output")
use_image_button = gr.Button("♻️ Use this Image for Next Edit")
gr.Markdown("## Thank you for being a PRO! 🤗")
login_button = gr.LoginButton()
# --- Event Handlers ---
def unified_generator(
prompt: str,
single_image: Optional[str],
multi_images: Optional[List[str]],
active_tab: str,
oauth_token: Optional[gr.OAuthToken] = None,
) -> str:
if not verify_pro_status(oauth_token):
raise gr.Error("Access Denied. This service is for PRO users only.")
if active_tab == "multiple" and multi_images:
return run_multi_image_logic(prompt, multi_images)
else:
return run_single_image_logic(prompt, single_image)
single_tab.select(lambda: "single", None, active_tab_state)
multi_tab.select(lambda: "multiple", None, active_tab_state)
generate_button.click(
unified_generator,
inputs=[prompt_input, image_input, gallery_input, active_tab_state],
outputs=[output_image],
)
use_image_button.click(
lambda img: img,
inputs=[output_image],
outputs=[image_input]
)
# --- Access Control Logic ---
def control_access(
profile: Optional[gr.OAuthProfile] = None,
oauth_token: Optional[gr.OAuthToken] = None
):
if not profile:
return gr.update(visible=False), gr.update(visible=False)
if verify_pro_status(oauth_token):
return gr.update(visible=True), gr.update(visible=False)
else:
message = (
"## ✨ Exclusive Access for PRO Users\n\n"
"Thank you for your interest! This feature is available exclusively for our Hugging Face **PRO** members.\n\n"
"To unlock this and many other benefits, please consider upgrading your account.\n\n"
"### [**Become a PRO Member Today!**](https://huggingface.co/pro)"
)
return gr.update(visible=False), gr.update(visible=True, value=message)
demo.load(control_access, inputs=None, outputs=[main_interface, pro_message])
if __name__ == "__main__":
demo.queue(max_size=None, default_concurrency_limit=None)
demo.launch()