Spaces:

Athspi
/

Gttg

Sleeping

App Files Files Community

Athspi commited on Mar 22

Commit

b0a339e

verified ·

1 Parent(s): 605bf7b

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -124

app.py CHANGED Viewed

@@ -1,120 +1,87 @@
-import base64
 import os
 import mimetypes
 from flask import Flask, render_template, request, jsonify
 from werkzeug.utils import secure_filename
 from google import genai
 from google.genai import types
-# Initialize Flask app
 app = Flask(__name__)
-# Set your Gemini API key via Hugging Face Spaces environment variables.
-# Do not include a default fallback; the environment must supply GEMINI_API_KEY.
 GEMINI_API_KEY = os.environ["GEMINI_API_KEY"]
 client = genai.Client(api_key=GEMINI_API_KEY)
-# Create necessary directories
 UPLOAD_FOLDER = 'uploads'
 RESULT_FOLDER = os.path.join('static')
 os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 os.makedirs(RESULT_FOLDER, exist_ok=True)
-def analyze_object_removal_request(object_type):
-    """
-    Analyzes the object removal request using gemini-2.0-flash-lite to check if it's about people or animals.
-    Returns True if it's a person/animal removal, False otherwise.
-    """
-    model_text_check = "gemini-2.0-flash-lite"
-    contents_text_check = [
-        types.Content(
-            role="user",
-            parts=[
-                types.Part.from_text(text=f"Is '{object_type}' a person or animal? Answer yes or no."),
-            ],
-        ),
     ]
-    generate_content_config_text_check = types.GenerateContentConfig(
-        temperature=0.1, # Lower temperature for more deterministic yes/no answers
-        top_p=0.95,
-        top_k=40,
-        max_output_tokens=256, # Limit output tokens for quick analysis
-        response_mime_type="text/plain",
         system_instruction=[
-            types.Part.from_text(text="""You are a helpful AI assistant. Determine if the user's object removal request is about a person or animal. Respond with only 'yes' or 'no'."""),
         ],
     )
     try:
-        response_text_check = client.models.generate_content(
-            model=model_text_check,
-            contents=contents_text_check,
-            config=generate_content_config_text_check,
         )
-        if response_text_check.text:
-            lower_text = response_text_check.text.strip().lower()
-            if "yes" in lower_text:
-                return True  # It's likely a person or animal
-            elif "no" in lower_text:
-                return False # It's likely not a person or animal
-            else:
-                # If the response is unclear, err on the side of caution (treat as person/animal)
-                print(f"Warning: Unclear text analysis response: '{response_text_check.text}'. Treating as potential person/animal removal.")
-                return True # Be conservative
-        else:
-            print("Warning: No text response from text analysis model.")
-            return True # Be conservative if no response
     except Exception as e:
-        print(f"Error during text analysis: {e}")
-        return True # Be conservative on error
-def generate_gemini_output(object_type, image_data_url):
-    """
-    Generate output from Gemini by removing the specified object, with initial text analysis.
-    Expects the image_data_url to be a base64 data URL.
-    """
-    # Analyze the object type using gemini-2.0-flash-lite
-    if analyze_object_removal_request(object_type):
-        return "Sorry, I can't assist with removing people or animals.", None # Text result, no image
-    model_image_gen = "gemini-2.0-flash-exp-image-generation" # Switch to image generation model if not person/animal
-    files = []
-    # Decode the image data from the data URL (same as before)
-    if image_data_url:
-        try:
-            header, encoded = image_data_url.split(',', 1)
-            binary_data = base64.b64decode(encoded)
-            mime_type = header.split(':')[1].split(';')[0]
-            ext = mimetypes.guess_extension(mime_type) or ".png"
-            if ext not in ['.jpg', '.jpeg', '.png']:
-                raise ValueError("Invalid image format. Only JPG, JPEG, and PNG are supported.")
-            temp_filename = secure_filename("temp_image" + ext)
-            temp_filepath = os.path.join(UPLOAD_FOLDER, temp_filename)
-            with open(temp_filepath, "wb") as f:
-                f.write(binary_data)
-            uploaded_file = client.files.upload(file=temp_filepath)
-            files.append(uploaded_file)
-            os.remove(temp_filepath)
-        except (ValueError, base64.binascii.Error) as e:
-            raise ValueError(f"Invalid image data: {str(e)}") from e
-        except Exception as e:
-            raise ValueError(f"Error processing image: {str(e)}") from e
-    # Prepare content parts for Gemini (same as before)
-    parts = []
-    if files:
-        parts.append(types.Part.from_uri(file_uri=files[0].uri, mime_type=files[0].mime_type))
-    if object_type:
-        magic_prompt = f"Remove {object_type} from the image"
-        parts.append(types.Part.from_text(text=magic_prompt))
     contents = [types.Content(role="user", parts=parts)]
-    generate_content_config_image_gen = types.GenerateContentConfig( # Config for image generation model
         temperature=1,
         top_p=0.95,
         top_k=40,
@@ -124,18 +91,14 @@ def generate_gemini_output(object_type, image_data_url):
             types.SafetySetting(category="HARM_CATEGORY_CIVIC_INTEGRITY", threshold="OFF"),
         ],
     )
-    result_text = None
     result_image = None
-    try:
-        for chunk in client.models.generate_content_stream(
-            model=model_image_gen, # Use image generation model here
-            contents=contents,
-            config=generate_content_config_image_gen,
-        ):
-            if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
-                continue
             part = chunk.candidates[0].content.parts[0]
             if part.inline_data:
                 file_extension = mimetypes.guess_extension(part.inline_data.mime_type) or ".png"
@@ -144,14 +107,8 @@ def generate_gemini_output(object_type, image_data_url):
                 with open(result_image_path, "wb") as f:
                     f.write(part.inline_data.data)
                 result_image = result_image_path
-            else:
-                result_text = part.text
-    except genai.APIError as e:
-         raise RuntimeError(f"Gemini API Error: {str(e)}") from e
-    except Exception as e:
-        raise RuntimeError(f"An unexpected error occurred during Gemini processing: {str(e)}") from e
-    return result_text, result_image # May return text error or image path/None
 @app.route("/")
 def index():
@@ -163,20 +120,31 @@ def process():
         data = request.get_json(force=True)
         image_data = data.get("image")
         object_type = data.get("objectType", "").strip()
         if not image_data or not object_type:
-            return jsonify({"success": False, "message": "Missing image data or object type."}), 400
-        # Generate output using Gemini (now with text analysis first)
-        result_text, result_image = generate_gemini_output(object_type, image_data)
-        if result_text and not result_image: # Text result means error or text response
-            return jsonify({"success": False, "message": result_text}), 400 # Send back text error
-        if not result_image: # Still check for image failure if no text error
-            return jsonify({"success": False, "message": "Failed to generate image. The object may be too large or complex, or the image may not be suitable."}), 500
-        image_url = f"/static/{os.path.basename(result_image)}"
-        return jsonify({"success": True, "resultPath": image_url, "resultText": result_text}) # resultText might be None or text from image model
     except Exception as e:
         return jsonify({"success": False, "message": f"Error: {str(e)}"}), 500

 import os
+import base64
 import mimetypes
 from flask import Flask, render_template, request, jsonify
 from werkzeug.utils import secure_filename
 from google import genai
 from google.genai import types
 app = Flask(__name__)
+# Initialize Gemini client
 GEMINI_API_KEY = os.environ["GEMINI_API_KEY"]
 client = genai.Client(api_key=GEMINI_API_KEY)
+# Configure upload folders
 UPLOAD_FOLDER = 'uploads'
 RESULT_FOLDER = os.path.join('static')
 os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 os.makedirs(RESULT_FOLDER, exist_ok=True)
+def upload_image(image_data_url):
+    """Helper function to upload image to Gemini"""
+    try:
+        header, encoded = image_data_url.split(',', 1)
+    except ValueError:
+        raise ValueError("Invalid image data")
+    binary_data = base64.b64decode(encoded)
+    ext = ".png" if "png" in header.lower() else ".jpg"
+    temp_filename = secure_filename("temp_image" + ext)
+    temp_filepath = os.path.join(UPLOAD_FOLDER, temp_filename)
+    with open(temp_filepath, "wb") as f:
+        f.write(binary_data)
+    return client.files.upload(file=temp_filepath)
+def is_prohibited_request(uploaded_file, object_type):
+    """Check if request is to remove person/animal using gemini-2.0-flash-lite"""
+    model = "gemini-2.0-flash-lite"
+    parts = [
+        types.Part.from_uri(file_uri=uploaded_file.uri, mime_type=uploaded_file.mime_type),
+        types.Part.from_text(text=f"Remove {object_type}")
     ]
+    contents = [types.Content(role="user", parts=parts)]
+    generate_content_config = types.GenerateContentConfig(
         system_instruction=[
+            types.Part.from_text(text="""Determine if the user wants to remove a person or animal.
+            Respond ONLY with 'Yes' or 'No'. Consider these examples:
+            - Remove person → Yes
+            - Remove dog → Yes
+            - Remove sunglasses → No
+            - Remove background → No""")
         ],
+        temperature=0.0,
+        max_output_tokens=1,
     )
     try:
+        response = client.models.generate_content(
+            model=model,
+            contents=contents,
+            config=generate_content_config
         )
+        if response.candidates and response.candidates[0].content.parts:
+            return response.candidates[0].content.parts[0].text.strip().lower() == "yes"
+        return False
     except Exception as e:
+        print(f"Prohibition check error: {str(e)}")
+        return False
+def generate_gemini_output(object_type, uploaded_file):
+    """Generate image using gemini-2.0-flash-exp-image-generation"""
+    model = "gemini-2.0-flash-exp-image-generation"
+    parts = [
+        types.Part.from_uri(file_uri=uploaded_file.uri, mime_type=uploaded_file.mime_type),
+        types.Part.from_text(text=f"Remove {object_type} from the image")
+    ]
     contents = [types.Content(role="user", parts=parts)]
+    generate_content_config = types.GenerateContentConfig(
         temperature=1,
         top_p=0.95,
         top_k=40,
             types.SafetySetting(category="HARM_CATEGORY_CIVIC_INTEGRITY", threshold="OFF"),
         ],
     )
     result_image = None
+    for chunk in client.models.generate_content_stream(
+        model=model,
+        contents=contents,
+        config=generate_content_config,
+    ):
+        if chunk.candidates and chunk.candidates[0].content.parts:
             part = chunk.candidates[0].content.parts[0]
             if part.inline_data:
                 file_extension = mimetypes.guess_extension(part.inline_data.mime_type) or ".png"
                 with open(result_image_path, "wb") as f:
                     f.write(part.inline_data.data)
                 result_image = result_image_path
+    return result_image
 @app.route("/")
 def index():
         data = request.get_json(force=True)
         image_data = data.get("image")
         object_type = data.get("objectType", "").strip()
         if not image_data or not object_type:
+            return jsonify({"success": False, "message": "Missing required data"}), 400
+        # Upload image once
+        uploaded_file = upload_image(image_data)
+        # Check for prohibited requests
+        if is_prohibited_request(uploaded_file, object_type):
+            return jsonify({
+                "success": False,
+                "message": "Sorry, I can't assist with removing people or animals."
+            }), 400
+        # Generate output if allowed
+        result_image = generate_gemini_output(object_type, uploaded_file)
+        if not result_image:
+            return jsonify({"success": False, "message": "Failed to generate image"}), 500
+        return jsonify({
+            "success": True,
+            "resultPath": f"/static/{os.path.basename(result_image)}"
+        })
     except Exception as e:
         return jsonify({"success": False, "message": f"Error: {str(e)}"}), 500