Athspi commited on
Commit
b0a339e
·
verified ·
1 Parent(s): 605bf7b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -124
app.py CHANGED
@@ -1,120 +1,87 @@
1
- import base64
2
  import os
 
3
  import mimetypes
4
  from flask import Flask, render_template, request, jsonify
5
  from werkzeug.utils import secure_filename
6
  from google import genai
7
  from google.genai import types
8
 
9
- # Initialize Flask app
10
  app = Flask(__name__)
11
 
12
- # Set your Gemini API key via Hugging Face Spaces environment variables.
13
- # Do not include a default fallback; the environment must supply GEMINI_API_KEY.
14
  GEMINI_API_KEY = os.environ["GEMINI_API_KEY"]
15
  client = genai.Client(api_key=GEMINI_API_KEY)
16
 
17
- # Create necessary directories
18
  UPLOAD_FOLDER = 'uploads'
19
  RESULT_FOLDER = os.path.join('static')
20
  os.makedirs(UPLOAD_FOLDER, exist_ok=True)
21
  os.makedirs(RESULT_FOLDER, exist_ok=True)
22
 
23
- def analyze_object_removal_request(object_type):
24
- """
25
- Analyzes the object removal request using gemini-2.0-flash-lite to check if it's about people or animals.
26
- Returns True if it's a person/animal removal, False otherwise.
27
- """
28
- model_text_check = "gemini-2.0-flash-lite"
29
- contents_text_check = [
30
- types.Content(
31
- role="user",
32
- parts=[
33
- types.Part.from_text(text=f"Is '{object_type}' a person or animal? Answer yes or no."),
34
- ],
35
- ),
 
 
 
 
 
 
 
 
 
 
36
  ]
37
- generate_content_config_text_check = types.GenerateContentConfig(
38
- temperature=0.1, # Lower temperature for more deterministic yes/no answers
39
- top_p=0.95,
40
- top_k=40,
41
- max_output_tokens=256, # Limit output tokens for quick analysis
42
- response_mime_type="text/plain",
43
  system_instruction=[
44
- types.Part.from_text(text="""You are a helpful AI assistant. Determine if the user's object removal request is about a person or animal. Respond with only 'yes' or 'no'."""),
 
 
 
 
 
45
  ],
 
 
46
  )
47
-
48
  try:
49
- response_text_check = client.models.generate_content(
50
- model=model_text_check,
51
- contents=contents_text_check,
52
- config=generate_content_config_text_check,
53
  )
54
- if response_text_check.text:
55
- lower_text = response_text_check.text.strip().lower()
56
- if "yes" in lower_text:
57
- return True # It's likely a person or animal
58
- elif "no" in lower_text:
59
- return False # It's likely not a person or animal
60
- else:
61
- # If the response is unclear, err on the side of caution (treat as person/animal)
62
- print(f"Warning: Unclear text analysis response: '{response_text_check.text}'. Treating as potential person/animal removal.")
63
- return True # Be conservative
64
- else:
65
- print("Warning: No text response from text analysis model.")
66
- return True # Be conservative if no response
67
  except Exception as e:
68
- print(f"Error during text analysis: {e}")
69
- return True # Be conservative on error
70
-
71
- def generate_gemini_output(object_type, image_data_url):
72
- """
73
- Generate output from Gemini by removing the specified object, with initial text analysis.
74
- Expects the image_data_url to be a base64 data URL.
75
- """
76
-
77
- # Analyze the object type using gemini-2.0-flash-lite
78
- if analyze_object_removal_request(object_type):
79
- return "Sorry, I can't assist with removing people or animals.", None # Text result, no image
80
-
81
- model_image_gen = "gemini-2.0-flash-exp-image-generation" # Switch to image generation model if not person/animal
82
- files = []
83
-
84
- # Decode the image data from the data URL (same as before)
85
- if image_data_url:
86
- try:
87
- header, encoded = image_data_url.split(',', 1)
88
- binary_data = base64.b64decode(encoded)
89
- mime_type = header.split(':')[1].split(';')[0]
90
- ext = mimetypes.guess_extension(mime_type) or ".png"
91
- if ext not in ['.jpg', '.jpeg', '.png']:
92
- raise ValueError("Invalid image format. Only JPG, JPEG, and PNG are supported.")
93
-
94
- temp_filename = secure_filename("temp_image" + ext)
95
- temp_filepath = os.path.join(UPLOAD_FOLDER, temp_filename)
96
- with open(temp_filepath, "wb") as f:
97
- f.write(binary_data)
98
- uploaded_file = client.files.upload(file=temp_filepath)
99
- files.append(uploaded_file)
100
- os.remove(temp_filepath)
101
-
102
- except (ValueError, base64.binascii.Error) as e:
103
- raise ValueError(f"Invalid image data: {str(e)}") from e
104
- except Exception as e:
105
- raise ValueError(f"Error processing image: {str(e)}") from e
106
-
107
- # Prepare content parts for Gemini (same as before)
108
- parts = []
109
- if files:
110
- parts.append(types.Part.from_uri(file_uri=files[0].uri, mime_type=files[0].mime_type))
111
- if object_type:
112
- magic_prompt = f"Remove {object_type} from the image"
113
- parts.append(types.Part.from_text(text=magic_prompt))
114
-
115
  contents = [types.Content(role="user", parts=parts)]
116
-
117
- generate_content_config_image_gen = types.GenerateContentConfig( # Config for image generation model
118
  temperature=1,
119
  top_p=0.95,
120
  top_k=40,
@@ -124,18 +91,14 @@ def generate_gemini_output(object_type, image_data_url):
124
  types.SafetySetting(category="HARM_CATEGORY_CIVIC_INTEGRITY", threshold="OFF"),
125
  ],
126
  )
127
-
128
- result_text = None
129
  result_image = None
130
-
131
- try:
132
- for chunk in client.models.generate_content_stream(
133
- model=model_image_gen, # Use image generation model here
134
- contents=contents,
135
- config=generate_content_config_image_gen,
136
- ):
137
- if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
138
- continue
139
  part = chunk.candidates[0].content.parts[0]
140
  if part.inline_data:
141
  file_extension = mimetypes.guess_extension(part.inline_data.mime_type) or ".png"
@@ -144,14 +107,8 @@ def generate_gemini_output(object_type, image_data_url):
144
  with open(result_image_path, "wb") as f:
145
  f.write(part.inline_data.data)
146
  result_image = result_image_path
147
- else:
148
- result_text = part.text
149
- except genai.APIError as e:
150
- raise RuntimeError(f"Gemini API Error: {str(e)}") from e
151
- except Exception as e:
152
- raise RuntimeError(f"An unexpected error occurred during Gemini processing: {str(e)}") from e
153
-
154
- return result_text, result_image # May return text error or image path/None
155
 
156
  @app.route("/")
157
  def index():
@@ -163,20 +120,31 @@ def process():
163
  data = request.get_json(force=True)
164
  image_data = data.get("image")
165
  object_type = data.get("objectType", "").strip()
 
166
  if not image_data or not object_type:
167
- return jsonify({"success": False, "message": "Missing image data or object type."}), 400
168
-
169
- # Generate output using Gemini (now with text analysis first)
170
- result_text, result_image = generate_gemini_output(object_type, image_data)
171
-
172
- if result_text and not result_image: # Text result means error or text response
173
- return jsonify({"success": False, "message": result_text}), 400 # Send back text error
174
-
175
- if not result_image: # Still check for image failure if no text error
176
- return jsonify({"success": False, "message": "Failed to generate image. The object may be too large or complex, or the image may not be suitable."}), 500
177
-
178
- image_url = f"/static/{os.path.basename(result_image)}"
179
- return jsonify({"success": True, "resultPath": image_url, "resultText": result_text}) # resultText might be None or text from image model
 
 
 
 
 
 
 
 
 
 
180
  except Exception as e:
181
  return jsonify({"success": False, "message": f"Error: {str(e)}"}), 500
182
 
 
 
1
  import os
2
+ import base64
3
  import mimetypes
4
  from flask import Flask, render_template, request, jsonify
5
  from werkzeug.utils import secure_filename
6
  from google import genai
7
  from google.genai import types
8
 
 
9
  app = Flask(__name__)
10
 
11
+ # Initialize Gemini client
 
12
  GEMINI_API_KEY = os.environ["GEMINI_API_KEY"]
13
  client = genai.Client(api_key=GEMINI_API_KEY)
14
 
15
+ # Configure upload folders
16
  UPLOAD_FOLDER = 'uploads'
17
  RESULT_FOLDER = os.path.join('static')
18
  os.makedirs(UPLOAD_FOLDER, exist_ok=True)
19
  os.makedirs(RESULT_FOLDER, exist_ok=True)
20
 
21
+ def upload_image(image_data_url):
22
+ """Helper function to upload image to Gemini"""
23
+ try:
24
+ header, encoded = image_data_url.split(',', 1)
25
+ except ValueError:
26
+ raise ValueError("Invalid image data")
27
+
28
+ binary_data = base64.b64decode(encoded)
29
+ ext = ".png" if "png" in header.lower() else ".jpg"
30
+ temp_filename = secure_filename("temp_image" + ext)
31
+ temp_filepath = os.path.join(UPLOAD_FOLDER, temp_filename)
32
+
33
+ with open(temp_filepath, "wb") as f:
34
+ f.write(binary_data)
35
+
36
+ return client.files.upload(file=temp_filepath)
37
+
38
+ def is_prohibited_request(uploaded_file, object_type):
39
+ """Check if request is to remove person/animal using gemini-2.0-flash-lite"""
40
+ model = "gemini-2.0-flash-lite"
41
+ parts = [
42
+ types.Part.from_uri(file_uri=uploaded_file.uri, mime_type=uploaded_file.mime_type),
43
+ types.Part.from_text(text=f"Remove {object_type}")
44
  ]
45
+
46
+ contents = [types.Content(role="user", parts=parts)]
47
+
48
+ generate_content_config = types.GenerateContentConfig(
 
 
49
  system_instruction=[
50
+ types.Part.from_text(text="""Determine if the user wants to remove a person or animal.
51
+ Respond ONLY with 'Yes' or 'No'. Consider these examples:
52
+ - Remove person → Yes
53
+ - Remove dog → Yes
54
+ - Remove sunglasses → No
55
+ - Remove background → No""")
56
  ],
57
+ temperature=0.0,
58
+ max_output_tokens=1,
59
  )
60
+
61
  try:
62
+ response = client.models.generate_content(
63
+ model=model,
64
+ contents=contents,
65
+ config=generate_content_config
66
  )
67
+ if response.candidates and response.candidates[0].content.parts:
68
+ return response.candidates[0].content.parts[0].text.strip().lower() == "yes"
69
+ return False
 
 
 
 
 
 
 
 
 
 
70
  except Exception as e:
71
+ print(f"Prohibition check error: {str(e)}")
72
+ return False
73
+
74
+ def generate_gemini_output(object_type, uploaded_file):
75
+ """Generate image using gemini-2.0-flash-exp-image-generation"""
76
+ model = "gemini-2.0-flash-exp-image-generation"
77
+ parts = [
78
+ types.Part.from_uri(file_uri=uploaded_file.uri, mime_type=uploaded_file.mime_type),
79
+ types.Part.from_text(text=f"Remove {object_type} from the image")
80
+ ]
81
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  contents = [types.Content(role="user", parts=parts)]
83
+
84
+ generate_content_config = types.GenerateContentConfig(
85
  temperature=1,
86
  top_p=0.95,
87
  top_k=40,
 
91
  types.SafetySetting(category="HARM_CATEGORY_CIVIC_INTEGRITY", threshold="OFF"),
92
  ],
93
  )
94
+
 
95
  result_image = None
96
+ for chunk in client.models.generate_content_stream(
97
+ model=model,
98
+ contents=contents,
99
+ config=generate_content_config,
100
+ ):
101
+ if chunk.candidates and chunk.candidates[0].content.parts:
 
 
 
102
  part = chunk.candidates[0].content.parts[0]
103
  if part.inline_data:
104
  file_extension = mimetypes.guess_extension(part.inline_data.mime_type) or ".png"
 
107
  with open(result_image_path, "wb") as f:
108
  f.write(part.inline_data.data)
109
  result_image = result_image_path
110
+
111
+ return result_image
 
 
 
 
 
 
112
 
113
  @app.route("/")
114
  def index():
 
120
  data = request.get_json(force=True)
121
  image_data = data.get("image")
122
  object_type = data.get("objectType", "").strip()
123
+
124
  if not image_data or not object_type:
125
+ return jsonify({"success": False, "message": "Missing required data"}), 400
126
+
127
+ # Upload image once
128
+ uploaded_file = upload_image(image_data)
129
+
130
+ # Check for prohibited requests
131
+ if is_prohibited_request(uploaded_file, object_type):
132
+ return jsonify({
133
+ "success": False,
134
+ "message": "Sorry, I can't assist with removing people or animals."
135
+ }), 400
136
+
137
+ # Generate output if allowed
138
+ result_image = generate_gemini_output(object_type, uploaded_file)
139
+
140
+ if not result_image:
141
+ return jsonify({"success": False, "message": "Failed to generate image"}), 500
142
+
143
+ return jsonify({
144
+ "success": True,
145
+ "resultPath": f"/static/{os.path.basename(result_image)}"
146
+ })
147
+
148
  except Exception as e:
149
  return jsonify({"success": False, "message": f"Error: {str(e)}"}), 500
150