Spaces:

aiqtech
/

Nano-Banana-API

Running

File size: 21,273 Bytes

import gradio as gr
from google import genai 
from google.genai import types 
import os
from typing import Optional, List
from huggingface_hub import whoami
from PIL import Image
from io import BytesIO
import tempfile

# --- Google Gemini API Configuration ---
# Use GEMINI_API_KEY if available, otherwise fall back to GOOGLE_API_KEY
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

API_KEY = GEMINI_API_KEY or GOOGLE_API_KEY

if not API_KEY:
    raise ValueError("Neither GEMINI_API_KEY nor GOOGLE_API_KEY environment variable is set.")

client = genai.Client(
    api_key=API_KEY,
)

GEMINI_MODEL_NAME = 'gemini-2.5-flash-image-preview'

def verify_pro_status(token: Optional[gr.OAuthToken]) -> bool:
    """Verifies if the user is a Hugging Face PRO user or part of an enterprise org."""
    if not token:
        return False
    try:
        user_info = whoami(token=token.token)
        if user_info.get("isPro", False):
            return True
        orgs = user_info.get("orgs", [])
        if any(org.get("isEnterprise", False) for org in orgs):
            return True
        return False
    except Exception as e:
        print(f"Could not verify user's PRO/Enterprise status: {e}")
        return False

def _extract_image_data_from_response(response) -> Optional[bytes]:
    """Helper to extract image data from the model's response."""
    # Debug: Print response structure
    print(f"Response type: {type(response)}")
    
    # Try multiple ways to extract image data
    # Method 1: Direct image attribute
    if hasattr(response, 'image'):
        print("Found response.image")
        return response.image
    
    # Method 2: Images array
    if hasattr(response, 'images') and response.images:
        print(f"Found response.images with {len(response.images)} images")
        return response.images[0]
    
    # Method 3: Candidates with parts
    if hasattr(response, 'candidates') and response.candidates:
        print(f"Found {len(response.candidates)} candidates")
        for i, candidate in enumerate(response.candidates):
            print(f"Candidate {i}: {type(candidate)}")
            
            # Check for content.parts
            if hasattr(candidate, 'content'):
                print(f"  Has content: {type(candidate.content)}")
                if hasattr(candidate.content, 'parts') and candidate.content.parts:
                    print(f"  Has {len(candidate.content.parts)} parts")
                    for j, part in enumerate(candidate.content.parts):
                        print(f"    Part {j}: {type(part)}")
                        
                        # Check for inline_data
                        if hasattr(part, 'inline_data'):
                            print(f"      Has inline_data")
                            if hasattr(part.inline_data, 'data'):
                                print(f"      Found image data!")
                                return part.inline_data.data
                            if hasattr(part.inline_data, 'blob'):
                                print(f"      Found blob data!")
                                return part.inline_data.blob
                        
                        # Check for blob directly
                        if hasattr(part, 'blob'):
                            print(f"      Has blob")
                            return part.blob
                        
                        # Check for data directly
                        if hasattr(part, 'data'):
                            print(f"      Has data")
                            return part.data
    
    # Method 4: Text response (might need different API configuration)
    if hasattr(response, 'text'):
        print(f"Response has text but no image: {response.text[:200] if response.text else 'Empty'}")
    
    print("No image data found in response")
    return None

def run_single_image_logic(prompt: str, image_path: Optional[str] = None, progress=gr.Progress()) -> str:
    """Handles text-to-image or single image-to-image using Google Gemini."""
    try:
        progress(0.2, desc="🎨 준비 중...")
        
        # Prepare the prompt with image generation instruction
        generation_prompt = f"Generate an image: {prompt}"
        
        contents = []
        if image_path:
            # Image-to-image generation
            input_image = Image.open(image_path)
            contents.append(input_image)
            contents.append(f"Edit this image: {prompt}")
        else:
            # Text-to-image generation
            contents.append(generation_prompt)

        progress(0.5, desc="✨ 생성 중...")
        
        # Try with generation config for images
        generation_config = types.GenerationConfig(
            temperature=1.0,
            max_output_tokens=8192,
        )
        
        response = client.models.generate_content( 
            model=GEMINI_MODEL_NAME,
            contents=contents,
            generation_config=generation_config,
        )
        
        # Debug: Print full response
        print(f"Full response: {response}")
        
        progress(0.8, desc="🖼️ 마무리 중...")
        image_data = _extract_image_data_from_response(response)
        
        if not image_data:
            # Try alternative approach - generate_images if available
            if hasattr(client.models, 'generate_images'):
                print("Trying generate_images method...")
                response = client.models.generate_images(
                    model=GEMINI_MODEL_NAME,
                    prompt=prompt,
                    n=1,
                )
                if hasattr(response, 'images') and response.images:
                    image_data = response.images[0]
            
            if not image_data:
                raise ValueError("No image data found in the model response. The API might not support image generation or the model name might be incorrect.")

        # Save the generated image to a temporary file to return its path
        pil_image = Image.open(BytesIO(image_data))
        with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmpfile:
            pil_image.save(tmpfile.name)
            progress(1.0, desc="✅ 완료!")
            return tmpfile.name

    except Exception as e:
        print(f"Error details: {e}")
        print(f"Error type: {type(e)}")
        raise gr.Error(f"이미지 생성 실패: {e}")


def run_multi_image_logic(prompt: str, images: List[str], progress=gr.Progress()) -> str:
    """
    Handles multi-image editing by sending a list of images and a prompt.
    """
    if not images:
        raise gr.Error("'여러 이미지' 탭에서 최소 한 개의 이미지를 업로드해주세요.")
    
    try:
        progress(0.2, desc="🎨 이미지 준비 중...")
        contents = []
        for image_path in images:
            if isinstance(image_path, (list, tuple)):
                image_path = image_path[0]
            contents.append(Image.open(image_path))
        contents.append(f"Combine/edit these images: {prompt}")

        progress(0.5, desc="✨ 생성 중...")
        
        generation_config = types.GenerationConfig(
            temperature=1.0,
            max_output_tokens=8192,
        )
        
        response = client.models.generate_content( 
            model=GEMINI_MODEL_NAME,
            contents=contents,
            generation_config=generation_config,
        )
        
        # Debug: Print full response
        print(f"Multi-image response: {response}")
        
        progress(0.8, desc="🖼️ 마무리 중...")
        image_data = _extract_image_data_from_response(response)

        if not image_data:
            raise ValueError("No image data found in the model response. The API might not support multi-image generation.")
            
        pil_image = Image.open(BytesIO(image_data))
        with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmpfile:
            pil_image.save(tmpfile.name)
            progress(1.0, desc="✅ 완료!")
            return tmpfile.name
            
    except Exception as e:
        print(f"Multi-image error details: {e}")
        raise gr.Error(f"이미지 생성 실패: {e}")


# --- Gradio App UI ---
css = '''
/* Header Styling */
.main-header {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
    padding: 2rem;
    border-radius: 1rem;
    margin-bottom: 2rem;
    box-shadow: 0 10px 30px rgba(0,0,0,0.1);
}

.header-title {
    font-size: 2.5rem !important;
    font-weight: bold;
    color: white;
    text-align: center;
    margin: 0 !important;
    text-shadow: 2px 2px 4px rgba(0,0,0,0.2);
}

.header-subtitle {
    color: rgba(255,255,255,0.9);
    text-align: center;
    margin-top: 0.5rem !important;
    font-size: 1.1rem;
}

/* Card Styling */
.card {
    background: white;
    border-radius: 1rem;
    padding: 1.5rem;
    box-shadow: 0 4px 6px rgba(0,0,0,0.1);
    border: 1px solid rgba(0,0,0,0.05);
}

.dark .card {
    background: #1f2937;
    border: 1px solid #374151;
}

/* Tab Styling */
.tabs {
    border-radius: 0.5rem;
    overflow: hidden;
    margin-bottom: 1rem;
}

.tabitem {
    padding: 1rem !important;
}

button.selected {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
    color: white !important;
}

/* Button Styling */
.generate-btn {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
    border: none !important;
    color: white !important;
    font-size: 1.1rem !important;
    font-weight: 600 !important;
    padding: 0.8rem 2rem !important;
    border-radius: 0.5rem !important;
    cursor: pointer !important;
    transition: all 0.3s ease !important;
    width: 100% !important;
    margin-top: 1rem !important;
}

.generate-btn:hover {
    transform: translateY(-2px) !important;
    box-shadow: 0 10px 20px rgba(102, 126, 234, 0.4) !important;
}

.use-btn {
    background: linear-gradient(135deg, #10b981 0%, #059669 100%) !important;
    border: none !important;
    color: white !important;
    font-weight: 600 !important;
    padding: 0.6rem 1.5rem !important;
    border-radius: 0.5rem !important;
    cursor: pointer !important;
    transition: all 0.3s ease !important;
    width: 100% !important;
}

.use-btn:hover {
    transform: translateY(-1px) !important;
    box-shadow: 0 5px 15px rgba(16, 185, 129, 0.4) !important;
}

/* Input Styling */
.prompt-input textarea {
    border-radius: 0.5rem !important;
    border: 2px solid #e5e7eb !important;
    padding: 0.8rem !important;
    font-size: 1rem !important;
    transition: border-color 0.3s ease !important;
}

.prompt-input textarea:focus {
    border-color: #667eea !important;
    outline: none !important;
}

.dark .prompt-input textarea {
    border-color: #374151 !important;
    background: #1f2937 !important;
}

/* Image Output Styling */
#output {
    border-radius: 0.5rem !important;
    overflow: hidden !important;
    box-shadow: 0 4px 6px rgba(0,0,0,0.1) !important;
}

/* Progress Bar Styling */
.progress-bar {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
}

/* Examples Styling */
.examples {
    background: #f9fafb;
    border-radius: 0.5rem;
    padding: 1rem;
    margin-top: 1rem;
}

.dark .examples {
    background: #1f2937;
}

/* Pro Message Styling */
.pro-message {
    background: linear-gradient(135deg, #fef3c7 0%, #fde68a 100%);
    border-radius: 1rem;
    padding: 2rem;
    text-align: center;
    border: 2px solid #f59e0b;
}

.dark .pro-message {
    background: linear-gradient(135deg, #7c2d12 0%, #92400e 100%);
    border-color: #f59e0b;
}

/* Emoji Animations */
@keyframes bounce {
    0%, 100% { transform: translateY(0); }
    50% { transform: translateY(-10px); }
}

.emoji-icon {
    display: inline-block;
    animation: bounce 2s infinite;
}

/* Responsive Design */
@media (max-width: 768px) {
    .header-title {
        font-size: 2rem !important;
    }
    
    .main-container {
        padding: 1rem !important;
    }
}
'''

with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
    # Header
    gr.HTML('''
    <div class="main-header">
        <h1 class="header-title">
            🍌 Real Nano Banana
        </h1>
        <p class="header-subtitle">
            Google Gemini 2.5 Flash Image Preview로 구동되는 AI 이미지 생성기
        </p>
    </div>
    ''')
    
    # Pro User Notice
    gr.HTML('''
    <div style="background: linear-gradient(135deg, #fef3c7 0%, #fde68a 100%); 
                border-radius: 0.5rem; padding: 1rem; margin-bottom: 1.5rem; 
                border-left: 4px solid #f59e0b;">
        <p style="margin: 0; color: #92400e; font-weight: 600;">
            🌟 이 스페이스는 Hugging Face PRO 사용자 전용입니다. 
            <a href="https://huggingface.co/pro" target="_blank" 
               style="color: #dc2626; text-decoration: underline;">
               PRO 구독하기
            </a>
        </p>
    </div>
    ''')

    pro_message = gr.Markdown(visible=False)
    main_interface = gr.Column(visible=False, elem_classes="main-container")

    with main_interface:
        with gr.Row():
            with gr.Column(scale=1):
                gr.HTML('<div class="card">')
                
                # Mode Selection
                gr.HTML('<h3 style="margin-top: 0;">📸 모드 선택</h3>')
                active_tab_state = gr.State(value="single")
                
                with gr.Tabs(elem_classes="tabs") as tabs:
                    with gr.TabItem("🖼️ 단일 이미지", id="single") as single_tab:
                        image_input = gr.Image(
                            type="filepath",
                            label="입력 이미지",
                            elem_classes="image-input"
                        )
                        gr.HTML('''
                        <p style="text-align: center; color: #6b7280; font-size: 0.9rem; margin-top: 0.5rem;">
                            💡 텍스트→이미지 생성은 비워두세요
                        </p>
                        ''')
                        
                    with gr.TabItem("🎨 다중 이미지", id="multiple") as multi_tab:
                        gallery_input = gr.Gallery(
                            label="입력 이미지들", 
                            file_types=["image"],
                            elem_classes="gallery-input"
                        )
                        gr.HTML('''
                        <p style="text-align: center; color: #6b7280; font-size: 0.9rem; margin-top: 0.5rem;">
                            💡 여러 이미지를 드래그 앤 드롭하세요
                        </p>
                        ''')
                
                # Prompt Input
                gr.HTML('<h3>✍️ 프롬프트</h3>')
                prompt_input = gr.Textbox(
                    label="",
                    info="AI에게 원하는 것을 설명하세요",
                    placeholder="예: 맛있어 보이는 피자, 우주를 배경으로 한 고양이, 미래적인 도시 풍경...",
                    lines=3,
                    elem_classes="prompt-input"
                )
                
                # Generate Button
                generate_button = gr.Button(
                    "🚀 생성하기", 
                    variant="primary",
                    elem_classes="generate-btn"
                )
                
                # Examples
                with gr.Accordion("💡 예제 프롬프트", open=False):
                    gr.Examples(
                        examples=[
                            ["치즈가 늘어나는 맛있어 보이는 피자"],
                            ["우주복을 입은 고양이가 달 표면을 걷고 있는 모습"],
                            ["네온 불빛이 빛나는 사이버펑크 도시의 야경"],
                            ["봄날 벚꽃이 만개한 일본 정원"],
                            ["판타지 세계의 마법사 탑"],
                        ],
                        inputs=prompt_input
                    )
                
                gr.HTML('</div>')

            with gr.Column(scale=1):
                gr.HTML('<div class="card">')
                gr.HTML('<h3 style="margin-top: 0;">🎨 생성 결과</h3>')
                
                output_image = gr.Image(
                    label="", 
                    interactive=False, 
                    elem_id="output"
                )
                
                use_image_button = gr.Button(
                    "♻️ 이 이미지를 다음 편집에 사용", 
                    elem_classes="use-btn",
                    visible=False
                )
                
                # Tips
                gr.HTML('''
                <div style="background: #f0f9ff; border-radius: 0.5rem; padding: 1rem; margin-top: 1rem;">
                    <h4 style="margin-top: 0; color: #0369a1;">💡 팁</h4>
                    <ul style="margin: 0; padding-left: 1.5rem; color: #0c4a6e;">
                        <li>구체적이고 상세한 프롬프트를 사용하세요</li>
                        <li>생성된 이미지를 재사용하여 반복적으로 개선할 수 있습니다</li>
                        <li>다중 이미지 모드로 여러 참조 이미지를 결합할 수 있습니다</li>
                    </ul>
                </div>
                ''')
                
                gr.HTML('</div>')
        
        # Footer
        gr.HTML('''
        <div style="text-align: center; margin-top: 2rem; padding: 1rem; 
                    border-top: 1px solid #e5e7eb;">
            <p style="color: #6b7280;">
                Made with 💜 by Hugging Face PRO | Powered by Google Gemini 2.5 Flash
            </p>
        </div>
        ''')
    
    login_button = gr.LoginButton()
    
    # --- Event Handlers ---
    def unified_generator(
        prompt: str,
        single_image: Optional[str],
        multi_images: Optional[List[str]],
        active_tab: str,
        oauth_token: Optional[gr.OAuthToken] = None,
    ):
        if not verify_pro_status(oauth_token):
            raise gr.Error("액세스 거부: 이 서비스는 PRO 사용자 전용입니다.")
        if not prompt:
            raise gr.Error("프롬프트를 입력해주세요.")
        if active_tab == "multiple" and multi_images:
            result = run_multi_image_logic(prompt, multi_images)
        else:
            result = run_single_image_logic(prompt, single_image)
        return result, gr.update(visible=True)

    single_tab.select(lambda: "single", None, active_tab_state)
    multi_tab.select(lambda: "multiple", None, active_tab_state)

    generate_button.click(
        unified_generator,
        inputs=[prompt_input, image_input, gallery_input, active_tab_state],
        outputs=[output_image, use_image_button],
    )

    use_image_button.click(
        lambda img: (img, gr.update(visible=False)), 
        inputs=[output_image],
        outputs=[image_input, use_image_button]
    )

    # --- Access Control Logic ---
    def control_access(
        profile: Optional[gr.OAuthProfile] = None,
        oauth_token: Optional[gr.OAuthToken] = None
    ):
        if not profile:
            return gr.update(visible=False), gr.update(visible=False)
        if verify_pro_status(oauth_token):
            return gr.update(visible=True), gr.update(visible=False)
        else:
            message = '''
            <div class="pro-message">
                <h2>✨ PRO 사용자 전용 기능</h2>
                <p style="font-size: 1.1rem; margin: 1rem 0;">
                    이 강력한 AI 이미지 생성 도구는 Hugging Face <strong>PRO</strong> 멤버 전용입니다.
                </p>
                <p style="margin: 1rem 0;">
                    PRO 구독으로 다음을 누리세요:
                </p>
                <ul style="text-align: left; display: inline-block; margin: 1rem 0;">
                    <li>🚀 Google Gemini 2.5 Flash 무제한 액세스</li>
                    <li>⚡ 빠른 이미지 생성</li>
                    <li>🎨 고품질 결과물</li>
                    <li>🔧 다중 이미지 편집 기능</li>
                </ul>
                <a href="https://huggingface.co/pro" target="_blank" 
                   style="display: inline-block; margin-top: 1rem; padding: 1rem 2rem;
                          background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                          color: white; text-decoration: none; border-radius: 0.5rem;
                          font-weight: bold; font-size: 1.1rem;">
                    🌟 지금 PRO 멤버 되기
                </a>
            </div>
            '''
            return gr.update(visible=False), gr.update(visible=True, value=message)

    demo.load(control_access, inputs=None, outputs=[main_interface, pro_message])

if __name__ == "__main__":
    demo.queue(max_size=None, default_concurrency_limit=None)
    demo.launch()