Spaces:

Athspi
/

Gttg

Sleeping

App Files Files Community

Athspi commited on Mar 22

Commit

7b62da2

verified ·

1 Parent(s): 5a42ff8

Update app.py

Browse files

Files changed (1) hide show

app.py +158 -42

app.py CHANGED Viewed

@@ -35,51 +35,146 @@ def upload_image(image_data_url):
     return client.files.upload(file=temp_filepath)
-def is_prohibited_request(uploaded_file, object_type):
-    """Check if request matches prohibited removal cases"""
-    object_type = object_type.lower()
-    # Prohibited cases
-    prohibited_requests = [
-        "remove sunglasses" in object_type and "table" not in object_type,  # ❌ when worn
-        "remove phone" in object_type and "hand" in object_type,           # ❌ when in hand
-        "remove eyes" in object_type                                      # ❌ remove eyes
     ]
-    # Allowed cases
-    allowed_requests = [
-        "remove sunglasses" in object_type and "table" in object_type,     # ✅ when on table
-        "remove car" in object_type,                                      # ✅ remove car
-        "remove background" in object_type                                # ✅ remove background
     ]
-    # Check for person/animal removal
-    person_animal_check = "remove person" in object_type or "remove animal" in object_type or \
-                         "remove dog" in object_type or "remove cat" in object_type
-    return any(prohibited_requests) or person_animal_check
 def generate_gemini_output(object_type, uploaded_file):
-    """Generate image using gemini-2.0-flash-exp-image-generation"""
     model = "gemini-2.0-flash-exp-image-generation"
     parts = [
         types.Part.from_uri(file_uri=uploaded_file.uri, mime_type=uploaded_file.mime_type),
         types.Part.from_text(text=f"Remove {object_type} from the image")
     ]
     contents = [types.Content(role="user", parts=parts)]
     generate_content_config = types.GenerateContentConfig(
         temperature=1,
         top_p=0.95,
         top_k=40,
         max_output_tokens=8192,
         response_modalities=["image", "text"],
-        safety_settings=[
-            types.SafetySetting(category="HARM_CATEGORY_CIVIC_INTEGRITY", threshold="OFF"),
-        ],
     )
     result_image = None
     for chunk in client.models.generate_content_stream(
         model=model,
@@ -95,7 +190,6 @@ def generate_gemini_output(object_type, uploaded_file):
                 with open(result_image_path, "wb") as f:
                     f.write(part.inline_data.data)
                 result_image = result_image_path
     return result_image
 @app.route("/")
@@ -107,27 +201,49 @@ def process():
     try:
         data = request.get_json(force=True)
         image_data = data.get("image")
-        object_type = data.get("objectType", "").strip().lower()
         if not image_data or not object_type:
             return jsonify({"success": False, "message": "Missing required data"}), 400
-        # Upload image once
         uploaded_file = upload_image(image_data)
-        # Check for prohibited requests
-        if is_prohibited_request(uploaded_file, object_type):
-            error_message = "Sorry, I can't assist with this request."
-            if "person" in object_type or "animal" in object_type or "cat" in object_type or "dog" in object_type:
-                error_message = "Sorry, I can't assist with removing people or animals."
-            return jsonify({"success": False, "message": error_message}), 400
-        # Generate output if allowed
         result_image = generate_gemini_output(object_type, uploaded_file)
         if not result_image:
             return jsonify({"success": False, "message": "Failed to generate image"}), 500
         return jsonify({
             "success": True,
             "resultPath": f"/static/{os.path.basename(result_image)}"

     return client.files.upload(file=temp_filepath)
+def normalize_object_type(object_type):
+    """Normalize object type by removing action verbs"""
+    action_verbs = {'remove', 'delete', 'erase', 'eliminate'}
+    words = object_type.lower().split()
+    filtered_words = [word for word in words if word not in action_verbs]
+    return ' '.join(filtered_words) if filtered_words else object_type.lower()
+def check_if_person_animal(uploaded_file, object_type):
+    """Check if the object to remove is a person or animal"""
+    model = "gemini-2.0-flash-lite"
+    parts = [
+        types.Part.from_uri(file_uri=uploaded_file.uri, mime_type=uploaded_file.mime_type),
+        types.Part.from_text(text=f"Remove {object_type}")
     ]
+    contents = [types.Content(role="user", parts=parts)]
+    generate_content_config = types.GenerateContentConfig(
+        system_instruction=[
+            types.Part.from_text(text="""Determine if the user wants to remove a person or animal.
+            Respond ONLY with 'Yes' or 'No'. Examples:
+            - Remove person → Yes
+            - Remove dog → Yes
+            - Remove sunglasses → No""")
+        ],
+        temperature=0.0,
+        max_output_tokens=1,
+    )
+    try:
+        response = client.models.generate_content(
+            model=model,
+            contents=contents,
+            config=generate_content_config
+        )
+        if response.candidates and response.candidates[0].content.parts:
+            return response.candidates[0].content.parts[0].text.strip().lower() == "yes"
+        return False
+    except Exception as e:
+        print(f"Error checking person/animal: {str(e)}")
+        return False
+def check_other_entities(uploaded_file, object_type):
+    """Check if image contains other people/animals"""
+    model = "gemini-2.0-flash-lite"
+    parts = [
+        types.Part.from_uri(file_uri=uploaded_file.uri, mime_type=uploaded_file.mime_type),
+        types.Part.from_text(text=f"Remove {object_type}")
     ]
+    contents = [types.Content(role="user", parts=parts)]
+    generate_content_config = types.GenerateContentConfig(
+        system_instruction=[
+            types.Part.from_text(text=f"""Analyze this image. Are there any other people or animals
+            besides the {object_type}? Respond ONLY with 'Yes' or 'No'.""")
+        ],
+        temperature=0.0,
+        max_output_tokens=1,
+    )
+    try:
+        response = client.models.generate_content(
+            model=model,
+            contents=contents,
+            config=generate_content_config
+        )
+        if response.candidates and response.candidates[0].content.parts:
+            return response.candidates[0].content.parts[0].text.strip().lower() == "yes"
+        return False
+    except Exception as e:
+        print(f"Error checking other entities: {str(e)}")
+        return False
+def check_sunglasses_state(uploaded_file):
+    """Check if sunglasses are being worn"""
+    model = "gemini-2.0-flash-lite"
+    parts = [
+        types.Part.from_uri(file_uri=uploaded_file.uri, mime_type=uploaded_file.mime_type),
+        types.Part.from_text(text="Are sunglasses being worn in this image?")
+    ]
+    contents = [types.Content(role="user", parts=parts)]
+    generate_content_config = types.GenerateContentConfig(
+        system_instruction=[
+            types.Part.from_text(text="Respond ONLY with 'Yes' or 'No'.")
+        ],
+        temperature=0.0,
+        max_output_tokens=1,
+    )
+    try:
+        response = client.models.generate_content(
+            model=model,
+            contents=contents,
+            config=generate_content_config
+        )
+        if response.candidates and response.candidates[0].content.parts:
+            return response.candidates[0].content.parts[0].text.strip().lower() == "yes"
+        return False
+    except Exception as e:
+        print(f"Error checking sunglasses: {str(e)}")
+        return False
+def check_phone_state(uploaded_file):
+    """Check if phone is in hand"""
+    model = "gemini-2.0-flash-lite"
+    parts = [
+        types.Part.from_uri(file_uri=uploaded_file.uri, mime_type=uploaded_file.mime_type),
+        types.Part.from_text(text="Is a phone being held in hand?")
+    ]
+    contents = [types.Content(role="user", parts=parts)]
+    generate_content_config = types.GenerateContentConfig(
+        system_instruction=[
+            types.Part.from_text(text="Respond ONLY with 'Yes' or 'No'.")
+        ],
+        temperature=0.0,
+        max_output_tokens=1,
+    )
+    try:
+        response = client.models.generate_content(
+            model=model,
+            contents=contents,
+            config=generate_content_config
+        )
+        if response.candidates and response.candidates[0].content.parts:
+            return response.candidates[0].content.parts[0].text.strip().lower() == "yes"
+        return False
+    except Exception as e:
+        print(f"Error checking phone: {str(e)}")
+        return False
 def generate_gemini_output(object_type, uploaded_file):
+    """Generate image using Gemini"""
     model = "gemini-2.0-flash-exp-image-generation"
     parts = [
         types.Part.from_uri(file_uri=uploaded_file.uri, mime_type=uploaded_file.mime_type),
         types.Part.from_text(text=f"Remove {object_type} from the image")
     ]
     contents = [types.Content(role="user", parts=parts)]
     generate_content_config = types.GenerateContentConfig(
         temperature=1,
         top_p=0.95,
         top_k=40,
         max_output_tokens=8192,
         response_modalities=["image", "text"],
+        safety_settings=[types.SafetySetting(category="HARM_CATEGORY_CIVIC_INTEGRITY", threshold="OFF")],
     )
     result_image = None
     for chunk in client.models.generate_content_stream(
         model=model,
                 with open(result_image_path, "wb") as f:
                     f.write(part.inline_data.data)
                 result_image = result_image_path
     return result_image
 @app.route("/")
     try:
         data = request.get_json(force=True)
         image_data = data.get("image")
+        object_type = data.get("objectType", "").strip()
         if not image_data or not object_type:
             return jsonify({"success": False, "message": "Missing required data"}), 400
         uploaded_file = upload_image(image_data)
+        normalized_object = normalize_object_type(object_type)
+        # Prohibited categories check
+        if normalized_object == 'eyes':
+            return jsonify({
+                "success": False,
+                "message": "Sorry, I can't assist with removing eyes."
+            }), 400
+        # State checks
+        if normalized_object == 'sunglasses':
+            if check_sunglasses_state(uploaded_file):
+                return jsonify({
+                    "success": False,
+                    "message": "Can't remove sunglasses while being worn."
+                }), 400
+        if normalized_object == 'phone':
+            if check_phone_state(uploaded_file):
+                return jsonify({
+                    "success": False,
+                    "message": "Can't remove phones while being held."
+                }), 400
+        # Person/animal checks
+        if check_if_person_animal(uploaded_file, normalized_object):
+            if check_other_entities(uploaded_file, normalized_object):
+                return jsonify({
+                    "success": False,
+                    "message": "Can't remove people/animals when others are present."
+                }), 400
+        # Generate output
         result_image = generate_gemini_output(object_type, uploaded_file)
         if not result_image:
             return jsonify({"success": False, "message": "Failed to generate image"}), 500
         return jsonify({
             "success": True,
             "resultPath": f"/static/{os.path.basename(result_image)}"