Spaces:

Mohit5899
/

Image_editing

Running

File size: 5,773 Bytes

import base64
import os
import mimetypes
import tempfile
import time
import google.generativeai as genai
import gradio as gr
from PIL import Image as PILImage

def generate_image(api_key, prompt, input_image, file_name):
    # Validate inputs
    if not api_key:
        return None, "Please enter your Gemini API key"
    
    if not prompt and input_image is None:
        return None, "Please enter a prompt and/or upload an input image"
    
    if not file_name:
        file_name = f"gemini_image_{int(time.time())}"
    
    try:
        # Set up the client
        genai.configure(api_key=api_key)
        model = genai.GenerativeModel("gemini-2.0-flash-exp-image-generation")
        
        # Create generation config
        generation_config = {
            "response_modalities": ["image", "text"],
            "safety_settings": {
                "HARM_CATEGORY_HARASSMENT": "BLOCK_NONE",
                "HARM_CATEGORY_HATE_SPEECH": "BLOCK_NONE",
                "HARM_CATEGORY_SEXUALLY_EXPLICIT": "BLOCK_NONE",
                "HARM_CATEGORY_DANGEROUS_CONTENT": "BLOCK_NONE",
                "HARM_CATEGORY_CIVIC_INTEGRITY": "BLOCK_NONE",
            }
        }
        
        # Create a temporary directory to store the image
        temp_dir = tempfile.mkdtemp()
        generated_image_path = None
        generation_text = ""
        
        # Prepare content based on inputs
        content = []
        
        # Add text prompt if provided
        if prompt:
            content.append(prompt)
        
        # Add image if provided
        if input_image is not None:
            # If the input is a file path (string), open the image
            if isinstance(input_image, str):
                img = PILImage.open(input_image)
                content.append(img)
            # If the input is already a numpy array from Gradio
            else:
                img = PILImage.fromarray(input_image)
                content.append(img)
        
        # Generate the content
        response = model.generate_content(
            content,
            generation_config=generation_config,
            stream=True
        )
        
        for chunk in response:
            if hasattr(chunk, 'candidates') and chunk.candidates:
                candidate = chunk.candidates[0]
                if hasattr(candidate, 'content') and candidate.content:
                    for part in candidate.content.parts:
                        if hasattr(part, 'inline_data') and part.inline_data:
                            inline_data = part.inline_data
                            file_extension = mimetypes.guess_extension(inline_data.mime_type) or '.jpg'
                            generated_image_path = os.path.join(temp_dir, f"{file_name}{file_extension}")
                            
                            with open(generated_image_path, "wb") as f:
                                f.write(inline_data.data)
                            
                            generation_text += f"Image of type {inline_data.mime_type} generated successfully."
                        elif hasattr(part, 'text') and part.text:
                            generation_text += part.text
        
        if generated_image_path:
            return generated_image_path, generation_text
        else:
            return None, "No image was generated. Try a different prompt or input image."
            
    except Exception as e:
        return None, f"Error: {str(e)}"

def create_ui():
    with gr.Blocks(title="Gemini Image Generator") as demo:
        gr.Markdown("# Gemini Image Generator")
        gr.Markdown("Generate images using Google's Gemini 2.0 Flash Image Generation model")
        
        with gr.Row():
            with gr.Column():
                api_key = gr.Textbox(
                    label="Gemini API Key", 
                    placeholder="Enter your Gemini API key here",
                    type="password"
                )
                prompt = gr.Textbox(
                    label="Prompt", 
                    placeholder="Describe the image you want to generate",
                    lines=3
                )
                input_image = gr.Image(
                    label="Input Image (Optional)",
                    type="numpy"
                )
                file_name = gr.Textbox(
                    label="Output File Name (optional)", 
                    placeholder="Enter a file name (without extension)"
                )
                generate_btn = gr.Button("Generate Image", variant="primary")
            
            with gr.Column():
                output_image = gr.Image(label="Generated Image", type="filepath")
                output_text = gr.Textbox(label="Generation Info", lines=2)
        
        generate_btn.click(
            fn=generate_image,
            inputs=[api_key, prompt, input_image, file_name],
            outputs=[output_image, output_text]
        )
        
        gr.Markdown("""
        ## How to use
        1. Enter your Gemini API key (get one from https://ai.google.dev/)
        2. Write a detailed prompt describing the image you want to generate
        3. (Optional) Upload an input image to influence the generation
        4. (Optional) Provide a file name for your generated image
        5. Click "Generate Image" and wait for the result
        
        ## Notes
        - The model used is `gemini-2.0-flash-exp-image-generation`
        - You can use text prompts, input images, or both together
        - All safety filters are set to "BLOCK_NONE" - use responsibly
        - Image generation may take a few seconds to complete
        """)
        
    return demo

if __name__ == "__main__":
    demo = create_ui()
    demo.launch()