iimran commited on
Commit
cc3f1c9
·
verified ·
1 Parent(s): 95d2aad

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +226 -0
app.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import time
4
+ import uuid
5
+ import tempfile
6
+ from PIL import Image
7
+ import gradio as gr
8
+ import base64
9
+
10
+ from google import genai
11
+ from google.genai import types
12
+
13
+ class ImageEditor:
14
+ def __init__(self):
15
+ self.model_name = "gemini-2.0-flash-exp"
16
+
17
+ def save_file(self, file_path, data):
18
+ """Save binary data to a file"""
19
+ with open(file_path, "wb") as f:
20
+ f.write(data)
21
+
22
+ def get_client(self, api_key):
23
+ """Initialize and return a Gemini client"""
24
+ key = api_key.strip() if api_key and api_key.strip() != "" else os.environ.get("GEMINI_API_KEY")
25
+ return genai.Client(api_key=key)
26
+
27
+ def upload_file(self, client, file_path):
28
+ """Upload a file to Gemini"""
29
+ return client.files.upload(file=file_path)
30
+
31
+ def create_content(self, file_uri, file_mime_type, prompt_text):
32
+ """Create content for the Gemini API request"""
33
+ return [
34
+ types.Content(
35
+ role="user",
36
+ parts=[
37
+ types.Part.from_uri(
38
+ file_uri=file_uri,
39
+ mime_type=file_mime_type,
40
+ ),
41
+ types.Part.from_text(text=prompt_text),
42
+ ],
43
+ ),
44
+ ]
45
+
46
+ def create_config(self):
47
+ """Create configuration for the Gemini API request"""
48
+ return types.GenerateContentConfig(
49
+ temperature=1,
50
+ top_p=0.95,
51
+ top_k=40,
52
+ max_output_tokens=8192,
53
+ response_modalities=["image", "text"],
54
+ response_mime_type="text/plain",
55
+ )
56
+
57
+ def process_response(self, response_stream, temp_path):
58
+ """Process the response stream from Gemini"""
59
+ text_response = ""
60
+ image_path = None
61
+
62
+ for chunk in response_stream:
63
+ if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
64
+ continue
65
+
66
+ candidate = chunk.candidates[0].content.parts[0]
67
+
68
+ if candidate.inline_data:
69
+ self.save_file(temp_path, candidate.inline_data.data)
70
+ print(f"Image saved to: {temp_path}")
71
+ image_path = temp_path
72
+ break
73
+ else:
74
+ text_response += chunk.text + "\n"
75
+
76
+ return image_path, text_response
77
+
78
+ def generate_image(self, prompt_text, file_path, api_key):
79
+ """Generate an image based on prompt and input image"""
80
+ client = self.get_client(api_key)
81
+
82
+ # Upload the file
83
+ uploaded_file = self.upload_file(client, file_path)
84
+
85
+ # Create content and config
86
+ contents = self.create_content(uploaded_file.uri, uploaded_file.mime_type, prompt_text)
87
+ config = self.create_config()
88
+
89
+ # Process the response
90
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
91
+ temp_path = tmp.name
92
+ response_stream = client.models.generate_content_stream(
93
+ model=self.model_name,
94
+ contents=contents,
95
+ config=config,
96
+ )
97
+
98
+ image_path, text_response = self.process_response(response_stream, temp_path)
99
+
100
+ # Clean up
101
+ del uploaded_file
102
+
103
+ return image_path, text_response
104
+
105
+ def process_image_and_prompt(self, input_image, prompt, api_key):
106
+ """Process the input image and prompt"""
107
+ try:
108
+ # Save the input image to a temporary file
109
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
110
+ image_path = tmp.name
111
+ input_image.save(image_path)
112
+
113
+ # Generate the image
114
+ result_path, text_response = self.generate_image(prompt, image_path, api_key)
115
+
116
+ if result_path:
117
+ # Load and convert the image if needed
118
+ result_img = Image.open(result_path)
119
+ if result_img.mode == "RGBA":
120
+ result_img = result_img.convert("RGB")
121
+ return [result_img], ""
122
+ else:
123
+ # Return no image and the text response
124
+ return None, text_response
125
+
126
+ except Exception as e:
127
+ raise gr.Error(f"Error: {e}", duration=5)
128
+
129
+
130
+ def create_interface():
131
+ """Create the Gradio interface"""
132
+ image_editor = ImageEditor()
133
+
134
+ with gr.Blocks(css="style.css") as app:
135
+ # Header
136
+ gr.HTML(
137
+ """
138
+ <div class="header-container">
139
+ <div>
140
+ <img src="https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png" alt="Gemini logo">
141
+ </div>
142
+ <div>
143
+ <h1>My Image Editing App</h1>
144
+ <p>Powered by Gradio⚡️ and Gemini |
145
+ <a href="https://aistudio.google.com/apikey">Get an API Key</a></p>
146
+ </div>
147
+ </div>
148
+ """
149
+ )
150
+
151
+ # API Configuration
152
+ with gr.Accordion("⚠️ API Configuration ⚠️", open=False):
153
+ gr.Markdown("""
154
+ - **Note:** You need to provide a Gemini API key for image generation
155
+ - Sometimes the model returns text instead of an image - try adjusting your prompt
156
+ """)
157
+
158
+ # Usage Instructions
159
+ with gr.Accordion("📌 Usage Instructions", open=False):
160
+ gr.Markdown("""
161
+ ### How to Use
162
+ - Upload an image (PNG format recommended)
163
+ - Enter a prompt describing the edit you want
164
+ - Click Generate to create your output
165
+ - If text is returned instead of an image, it will appear in the text output area
166
+ - ❌ **Do not use NSFW images!**
167
+ """)
168
+
169
+ # Main Content
170
+ with gr.Row():
171
+ # Input Column
172
+ with gr.Column():
173
+ image_input = gr.Image(
174
+ type="pil",
175
+ label="Upload Image",
176
+ image_mode="RGBA"
177
+ )
178
+ api_key_input = gr.Textbox(
179
+ lines=1,
180
+ placeholder="Enter Gemini API Key",
181
+ label="Gemini API Key",
182
+ type="password"
183
+ )
184
+ prompt_input = gr.Textbox(
185
+ lines=2,
186
+ placeholder="Describe the edit you want...",
187
+ label="Edit Prompt"
188
+ )
189
+ generate_btn = gr.Button("Generate Edit")
190
+
191
+ # Output Column
192
+ with gr.Column():
193
+ output_gallery = gr.Gallery(label="Edited Image")
194
+ output_text = gr.Textbox(
195
+ label="Text Output",
196
+ placeholder="Text response will appear here if no image is generated."
197
+ )
198
+
199
+ # Connect the interface
200
+ generate_btn.click(
201
+ fn=image_editor.process_image_and_prompt,
202
+ inputs=[image_input, prompt_input, api_key_input],
203
+ outputs=[output_gallery, output_text],
204
+ )
205
+
206
+ # Examples
207
+ gr.Markdown("## Example Prompts")
208
+
209
+ examples = [
210
+ ["data/1.webp", 'change text to "MY TEXT"', ""],
211
+ ["data/2.webp", "remove the spoon from the image", ""],
212
+ ["data/3.webp", 'change text to "Custom Text"', ""],
213
+ ["data/1.jpg", "add cartoon style to the face", ""],
214
+ ]
215
+
216
+ gr.Examples(
217
+ examples=examples,
218
+ inputs=[image_input, prompt_input]
219
+ )
220
+
221
+ return app
222
+
223
+ # Create and launch the app
224
+ if __name__ == "__main__":
225
+ app = create_interface()
226
+ app.queue(max_size=50).launch()