import base64 import os import mimetypes import tempfile import time import google.generativeai as genai import gradio as gr from PIL import Image as PILImage def generate_image(api_key, prompt, input_image, file_name): # Validate inputs if not api_key: return None, "Please enter your Gemini API key" if not prompt and input_image is None: return None, "Please enter a prompt and/or upload an input image" if not file_name: file_name = f"gemini_image_{int(time.time())}" try: # Set up the client genai.configure(api_key=api_key) model = genai.GenerativeModel("gemini-2.0-flash-exp-image-generation") # Create generation config generation_config = { "response_modalities": ["image", "text"], "safety_settings": { "HARM_CATEGORY_HARASSMENT": "BLOCK_NONE", "HARM_CATEGORY_HATE_SPEECH": "BLOCK_NONE", "HARM_CATEGORY_SEXUALLY_EXPLICIT": "BLOCK_NONE", "HARM_CATEGORY_DANGEROUS_CONTENT": "BLOCK_NONE", "HARM_CATEGORY_CIVIC_INTEGRITY": "BLOCK_NONE", } } # Create a temporary directory to store the image temp_dir = tempfile.mkdtemp() generated_image_path = None generation_text = "" # Prepare content based on inputs content = [] # Add text prompt if provided if prompt: content.append(prompt) # Add image if provided if input_image is not None: # If the input is a file path (string), open the image if isinstance(input_image, str): img = PILImage.open(input_image) content.append(img) # If the input is already a numpy array from Gradio else: img = PILImage.fromarray(input_image) content.append(img) # Generate the content response = model.generate_content( content, generation_config=generation_config, stream=True ) for chunk in response: if hasattr(chunk, 'candidates') and chunk.candidates: candidate = chunk.candidates[0] if hasattr(candidate, 'content') and candidate.content: for part in candidate.content.parts: if hasattr(part, 'inline_data') and part.inline_data: inline_data = part.inline_data file_extension = mimetypes.guess_extension(inline_data.mime_type) or '.jpg' generated_image_path = os.path.join(temp_dir, f"{file_name}{file_extension}") with open(generated_image_path, "wb") as f: f.write(inline_data.data) generation_text += f"Image of type {inline_data.mime_type} generated successfully." elif hasattr(part, 'text') and part.text: generation_text += part.text if generated_image_path: return generated_image_path, generation_text else: return None, "No image was generated. Try a different prompt or input image." except Exception as e: return None, f"Error: {str(e)}" def create_ui(): with gr.Blocks(title="Gemini Image Generator") as demo: gr.Markdown("# Gemini Image Generator") gr.Markdown("Generate images using Google's Gemini 2.0 Flash Image Generation model") with gr.Row(): with gr.Column(): api_key = gr.Textbox( label="Gemini API Key", placeholder="Enter your Gemini API key here", type="password" ) prompt = gr.Textbox( label="Prompt", placeholder="Describe the image you want to generate", lines=3 ) input_image = gr.Image( label="Input Image (Optional)", type="numpy" ) file_name = gr.Textbox( label="Output File Name (optional)", placeholder="Enter a file name (without extension)" ) generate_btn = gr.Button("Generate Image", variant="primary") with gr.Column(): output_image = gr.Image(label="Generated Image", type="filepath") output_text = gr.Textbox(label="Generation Info", lines=2) generate_btn.click( fn=generate_image, inputs=[api_key, prompt, input_image, file_name], outputs=[output_image, output_text] ) gr.Markdown(""" ## How to use 1. Enter your Gemini API key (get one from https://ai.google.dev/) 2. Write a detailed prompt describing the image you want to generate 3. (Optional) Upload an input image to influence the generation 4. (Optional) Provide a file name for your generated image 5. Click "Generate Image" and wait for the result ## Notes - The model used is `gemini-2.0-flash-exp-image-generation` - You can use text prompts, input images, or both together - All safety filters are set to "BLOCK_NONE" - use responsibly - Image generation may take a few seconds to complete """) return demo if __name__ == "__main__": demo = create_ui() demo.launch()