Spaces:

iimran
/

Gemini-picture-editor

Sleeping

App Files Files Community

iimran commited on Mar 22

Commit

cc3f1c9

verified ·

1 Parent(s): 95d2aad

Create app.py

Browse files

Files changed (1) hide show

app.py +226 -0

app.py ADDED Viewed

	@@ -0,0 +1,226 @@

+import json
+import os
+import time
+import uuid
+import tempfile
+from PIL import Image
+import gradio as gr
+import base64
+from google import genai
+from google.genai import types
+class ImageEditor:
+    def __init__(self):
+        self.model_name = "gemini-2.0-flash-exp"
+    def save_file(self, file_path, data):
+        """Save binary data to a file"""
+        with open(file_path, "wb") as f:
+            f.write(data)
+    def get_client(self, api_key):
+        """Initialize and return a Gemini client"""
+        key = api_key.strip() if api_key and api_key.strip() != "" else os.environ.get("GEMINI_API_KEY")
+        return genai.Client(api_key=key)
+    def upload_file(self, client, file_path):
+        """Upload a file to Gemini"""
+        return client.files.upload(file=file_path)
+    def create_content(self, file_uri, file_mime_type, prompt_text):
+        """Create content for the Gemini API request"""
+        return [
+            types.Content(
+                role="user",
+                parts=[
+                    types.Part.from_uri(
+                        file_uri=file_uri,
+                        mime_type=file_mime_type,
+                    ),
+                    types.Part.from_text(text=prompt_text),
+                ],
+            ),
+        ]
+    def create_config(self):
+        """Create configuration for the Gemini API request"""
+        return types.GenerateContentConfig(
+            temperature=1,
+            top_p=0.95,
+            top_k=40,
+            max_output_tokens=8192,
+            response_modalities=["image", "text"],
+            response_mime_type="text/plain",
+        )
+    def process_response(self, response_stream, temp_path):
+        """Process the response stream from Gemini"""
+        text_response = ""
+        image_path = None
+        for chunk in response_stream:
+            if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
+                continue
+            candidate = chunk.candidates[0].content.parts[0]
+            if candidate.inline_data:
+                self.save_file(temp_path, candidate.inline_data.data)
+                print(f"Image saved to: {temp_path}")
+                image_path = temp_path
+                break
+            else:
+                text_response += chunk.text + "\n"
+        return image_path, text_response
+    def generate_image(self, prompt_text, file_path, api_key):
+        """Generate an image based on prompt and input image"""
+        client = self.get_client(api_key)
+        # Upload the file
+        uploaded_file = self.upload_file(client, file_path)
+        # Create content and config
+        contents = self.create_content(uploaded_file.uri, uploaded_file.mime_type, prompt_text)
+        config = self.create_config()
+        # Process the response
+        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
+            temp_path = tmp.name
+            response_stream = client.models.generate_content_stream(
+                model=self.model_name,
+                contents=contents,
+                config=config,
+            )
+            image_path, text_response = self.process_response(response_stream, temp_path)
+        # Clean up
+        del uploaded_file
+        return image_path, text_response
+    def process_image_and_prompt(self, input_image, prompt, api_key):
+        """Process the input image and prompt"""
+        try:
+            # Save the input image to a temporary file
+            with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
+                image_path = tmp.name
+                input_image.save(image_path)
+            # Generate the image
+            result_path, text_response = self.generate_image(prompt, image_path, api_key)
+            if result_path:
+                # Load and convert the image if needed
+                result_img = Image.open(result_path)
+                if result_img.mode == "RGBA":
+                    result_img = result_img.convert("RGB")
+                return [result_img], ""
+            else:
+                # Return no image and the text response
+                return None, text_response
+        except Exception as e:
+            raise gr.Error(f"Error: {e}", duration=5)
+def create_interface():
+    """Create the Gradio interface"""
+    image_editor = ImageEditor()
+    with gr.Blocks(css="style.css") as app:
+        # Header
+        gr.HTML(
+        """
+        <div class="header-container">
+          <div>
+              <img src="https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png" alt="Gemini logo">
+          </div>
+          <div>
+              <h1>My Image Editing App</h1>
+              <p>Powered by Gradio⚡️ and Gemini |
+              <a href="https://aistudio.google.com/apikey">Get an API Key</a></p>
+          </div>
+        </div>
+        """
+        )
+        # API Configuration
+        with gr.Accordion("⚠️ API Configuration ⚠️", open=False):
+            gr.Markdown("""
+        - **Note:** You need to provide a Gemini API key for image generation
+        - Sometimes the model returns text instead of an image - try adjusting your prompt
+        """)
+        # Usage Instructions
+        with gr.Accordion("📌 Usage Instructions", open=False):
+            gr.Markdown("""
+        ### How to Use
+        - Upload an image (PNG format recommended)
+        - Enter a prompt describing the edit you want
+        - Click Generate to create your output
+        - If text is returned instead of an image, it will appear in the text output area
+        - ❌ **Do not use NSFW images!**
+        """)
+        # Main Content
+        with gr.Row():
+            # Input Column
+            with gr.Column():
+                image_input = gr.Image(
+                    type="pil",
+                    label="Upload Image",
+                    image_mode="RGBA"
+                )
+                api_key_input = gr.Textbox(
+                    lines=1,
+                    placeholder="Enter Gemini API Key",
+                    label="Gemini API Key",
+                    type="password"
+                )
+                prompt_input = gr.Textbox(
+                    lines=2,
+                    placeholder="Describe the edit you want...",
+                    label="Edit Prompt"
+                )
+                generate_btn = gr.Button("Generate Edit")
+            # Output Column
+            with gr.Column():
+                output_gallery = gr.Gallery(label="Edited Image")
+                output_text = gr.Textbox(
+                    label="Text Output",
+                    placeholder="Text response will appear here if no image is generated."
+                )
+        # Connect the interface
+        generate_btn.click(
+            fn=image_editor.process_image_and_prompt,
+            inputs=[image_input, prompt_input, api_key_input],
+            outputs=[output_gallery, output_text],
+        )
+        # Examples
+        gr.Markdown("## Example Prompts")
+        examples = [
+            ["data/1.webp", 'change text to "MY TEXT"', ""],
+            ["data/2.webp", "remove the spoon from the image", ""],
+            ["data/3.webp", 'change text to "Custom Text"', ""],
+            ["data/1.jpg", "add cartoon style to the face", ""],
+        ]
+        gr.Examples(
+            examples=examples,
+            inputs=[image_input, prompt_input]
+        )
+    return app
+# Create and launch the app
+if __name__ == "__main__":
+    app = create_interface()
+    app.queue(max_size=50).launch()