Spaces:

Athspi
/

Gttg

Sleeping

App Files Files Community

Athspi commited on Mar 22

Commit

605bf7b

verified ·

1 Parent(s): c90c576

Update app.py

Browse files

Files changed (1) hide show

app.py +103 -55

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
-import os
 import base64
 import mimetypes
 from flask import Flask, render_template, request, jsonify
 from werkzeug.utils import secure_filename
@@ -9,7 +9,8 @@ from google.genai import types
 # Initialize Flask app
 app = Flask(__name__)
-# Read the Gemini API key from environment variables (set in Hugging Face Spaces)
 GEMINI_API_KEY = os.environ["GEMINI_API_KEY"]
 client = genai.Client(api_key=GEMINI_API_KEY)
@@ -19,76 +20,119 @@ RESULT_FOLDER = os.path.join('static')
 os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 os.makedirs(RESULT_FOLDER, exist_ok=True)
 def generate_gemini_output(object_type, image_data_url):
     """
-    Generate output from Gemini by removing the specified object.
     Expects the image_data_url to be a base64 data URL.
     """
-    model = "gemini-2.0-flash-lite"  # Use the lite model for text-based responses
     files = []
-    # Decode the image data from the data URL
     if image_data_url:
         try:
             header, encoded = image_data_url.split(',', 1)
-        except ValueError:
-            raise ValueError("Invalid image data")
-        binary_data = base64.b64decode(encoded)
-        # Determine file extension from header
-        ext = ".png" if "png" in header.lower() else ".jpg"
-        temp_filename = secure_filename("temp_image" + ext)
-        temp_filepath = os.path.join(UPLOAD_FOLDER, temp_filename)
-        with open(temp_filepath, "wb") as f:
-            f.write(binary_data)
-        # Upload file to Gemini
-        uploaded_file = client.files.upload(file=temp_filepath)
-        files.append(uploaded_file)
-    # Prepare content parts for Gemini
     parts = []
     if files:
         parts.append(types.Part.from_uri(file_uri=files[0].uri, mime_type=files[0].mime_type))
     if object_type:
-        # Gemini magic prompt: instruct the model to remove the specified object
         magic_prompt = f"Remove {object_type} from the image"
         parts.append(types.Part.from_text(text=magic_prompt))
     contents = [types.Content(role="user", parts=parts)]
-    generate_content_config = types.GenerateContentConfig(
         temperature=1,
         top_p=0.95,
         top_k=40,
         max_output_tokens=8192,
-        response_mime_type="text/plain",
-        system_instruction=[
-            types.Part.from_text(text="""Your AI finds user requests about removing objects from images.
-If the user asks to remove a person or animal, respond with 'No'."""),
         ],
     )
     result_text = None
-    # Stream output from Gemini API
-    for chunk in client.models.generate_content_stream(
-        model=model,
-        contents=contents,
-        config=generate_content_config,
-    ):
-        if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
-            continue
-        part = chunk.candidates[0].content.parts[0]
-        if part.text:
-            result_text = part.text
-    # If the response is "No", switch to the image generation model
-    if result_text and "no" in result_text.lower():
-        model = "gemini-2.0-flash-exp-image-generation"
-        generate_content_config.response_modalities = ["image", "text"]
         for chunk in client.models.generate_content_stream(
-            model=model,
             contents=contents,
-            config=generate_content_config,
         ):
             if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
                 continue
@@ -99,38 +143,42 @@ If the user asks to remove a person or animal, respond with 'No'."""),
                 result_image_path = os.path.join(RESULT_FOLDER, output_filename)
                 with open(result_image_path, "wb") as f:
                     f.write(part.inline_data.data)
-                result_image = result_image_path  # Path relative to static folder
-                return result_text, result_image
-    return result_text, None
 @app.route("/")
 def index():
-    # Render the front-end HTML (which contains complete HTML/CSS/JS inline)
     return render_template("index.html")
 @app.route("/process", methods=["POST"])
 def process():
     try:
-        # Expect JSON with keys "image" (base64 data URL) and "objectType"
         data = request.get_json(force=True)
         image_data = data.get("image")
         object_type = data.get("objectType", "").strip()
         if not image_data or not object_type:
             return jsonify({"success": False, "message": "Missing image data or object type."}), 400
-        # Generate output using Gemini
         result_text, result_image = generate_gemini_output(object_type, image_data)
-        if not result_image:
-            return jsonify({"success": False, "message": result_text or "Failed to generate image."}), 500
-        # Create a URL to serve the image from the static folder.
-        image_url = f"/static/{os.path.basename(result_image)}"
-        return jsonify({"success": True, "resultPath": image_url, "resultText": result_text})
     except Exception as e:
         return jsonify({"success": False, "message": f"Error: {str(e)}"}), 500
 if __name__ == "__main__":
-    # Run the app on port 5000 or the port provided by the environment (for Hugging Face Spaces)
     app.run(host="0.0.0.0", port=7860)

 import base64
+import os
 import mimetypes
 from flask import Flask, render_template, request, jsonify
 from werkzeug.utils import secure_filename
 # Initialize Flask app
 app = Flask(__name__)
+# Set your Gemini API key via Hugging Face Spaces environment variables.
+# Do not include a default fallback; the environment must supply GEMINI_API_KEY.
 GEMINI_API_KEY = os.environ["GEMINI_API_KEY"]
 client = genai.Client(api_key=GEMINI_API_KEY)
 os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 os.makedirs(RESULT_FOLDER, exist_ok=True)
+def analyze_object_removal_request(object_type):
+    """
+    Analyzes the object removal request using gemini-2.0-flash-lite to check if it's about people or animals.
+    Returns True if it's a person/animal removal, False otherwise.
+    """
+    model_text_check = "gemini-2.0-flash-lite"
+    contents_text_check = [
+        types.Content(
+            role="user",
+            parts=[
+                types.Part.from_text(text=f"Is '{object_type}' a person or animal? Answer yes or no."),
+            ],
+        ),
+    ]
+    generate_content_config_text_check = types.GenerateContentConfig(
+        temperature=0.1, # Lower temperature for more deterministic yes/no answers
+        top_p=0.95,
+        top_k=40,
+        max_output_tokens=256, # Limit output tokens for quick analysis
+        response_mime_type="text/plain",
+        system_instruction=[
+            types.Part.from_text(text="""You are a helpful AI assistant. Determine if the user's object removal request is about a person or animal. Respond with only 'yes' or 'no'."""),
+        ],
+    )
+    try:
+        response_text_check = client.models.generate_content(
+            model=model_text_check,
+            contents=contents_text_check,
+            config=generate_content_config_text_check,
+        )
+        if response_text_check.text:
+            lower_text = response_text_check.text.strip().lower()
+            if "yes" in lower_text:
+                return True  # It's likely a person or animal
+            elif "no" in lower_text:
+                return False # It's likely not a person or animal
+            else:
+                # If the response is unclear, err on the side of caution (treat as person/animal)
+                print(f"Warning: Unclear text analysis response: '{response_text_check.text}'. Treating as potential person/animal removal.")
+                return True # Be conservative
+        else:
+            print("Warning: No text response from text analysis model.")
+            return True # Be conservative if no response
+    except Exception as e:
+        print(f"Error during text analysis: {e}")
+        return True # Be conservative on error
 def generate_gemini_output(object_type, image_data_url):
     """
+    Generate output from Gemini by removing the specified object, with initial text analysis.
     Expects the image_data_url to be a base64 data URL.
     """
+    # Analyze the object type using gemini-2.0-flash-lite
+    if analyze_object_removal_request(object_type):
+        return "Sorry, I can't assist with removing people or animals.", None # Text result, no image
+    model_image_gen = "gemini-2.0-flash-exp-image-generation" # Switch to image generation model if not person/animal
     files = []
+    # Decode the image data from the data URL (same as before)
     if image_data_url:
         try:
             header, encoded = image_data_url.split(',', 1)
+            binary_data = base64.b64decode(encoded)
+            mime_type = header.split(':')[1].split(';')[0]
+            ext = mimetypes.guess_extension(mime_type) or ".png"
+            if ext not in ['.jpg', '.jpeg', '.png']:
+                raise ValueError("Invalid image format. Only JPG, JPEG, and PNG are supported.")
+            temp_filename = secure_filename("temp_image" + ext)
+            temp_filepath = os.path.join(UPLOAD_FOLDER, temp_filename)
+            with open(temp_filepath, "wb") as f:
+                f.write(binary_data)
+            uploaded_file = client.files.upload(file=temp_filepath)
+            files.append(uploaded_file)
+            os.remove(temp_filepath)
+        except (ValueError, base64.binascii.Error) as e:
+            raise ValueError(f"Invalid image data: {str(e)}") from e
+        except Exception as e:
+            raise ValueError(f"Error processing image: {str(e)}") from e
+    # Prepare content parts for Gemini (same as before)
     parts = []
     if files:
         parts.append(types.Part.from_uri(file_uri=files[0].uri, mime_type=files[0].mime_type))
     if object_type:
         magic_prompt = f"Remove {object_type} from the image"
         parts.append(types.Part.from_text(text=magic_prompt))
     contents = [types.Content(role="user", parts=parts)]
+    generate_content_config_image_gen = types.GenerateContentConfig( # Config for image generation model
         temperature=1,
         top_p=0.95,
         top_k=40,
         max_output_tokens=8192,
+        response_modalities=["image", "text"],
+        safety_settings=[
+            types.SafetySetting(category="HARM_CATEGORY_CIVIC_INTEGRITY", threshold="OFF"),
         ],
     )
     result_text = None
+    result_image = None
+    try:
         for chunk in client.models.generate_content_stream(
+            model=model_image_gen, # Use image generation model here
             contents=contents,
+            config=generate_content_config_image_gen,
         ):
             if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
                 continue
                 result_image_path = os.path.join(RESULT_FOLDER, output_filename)
                 with open(result_image_path, "wb") as f:
                     f.write(part.inline_data.data)
+                result_image = result_image_path
+            else:
+                result_text = part.text
+    except genai.APIError as e:
+         raise RuntimeError(f"Gemini API Error: {str(e)}") from e
+    except Exception as e:
+        raise RuntimeError(f"An unexpected error occurred during Gemini processing: {str(e)}") from e
+    return result_text, result_image # May return text error or image path/None
 @app.route("/")
 def index():
     return render_template("index.html")
 @app.route("/process", methods=["POST"])
 def process():
     try:
         data = request.get_json(force=True)
         image_data = data.get("image")
         object_type = data.get("objectType", "").strip()
         if not image_data or not object_type:
             return jsonify({"success": False, "message": "Missing image data or object type."}), 400
+        # Generate output using Gemini (now with text analysis first)
         result_text, result_image = generate_gemini_output(object_type, image_data)
+        if result_text and not result_image: # Text result means error or text response
+            return jsonify({"success": False, "message": result_text}), 400 # Send back text error
+        if not result_image: # Still check for image failure if no text error
+            return jsonify({"success": False, "message": "Failed to generate image. The object may be too large or complex, or the image may not be suitable."}), 500
+        image_url = f"/static/{os.path.basename(result_image)}"
+        return jsonify({"success": True, "resultPath": image_url, "resultText": result_text}) # resultText might be None or text from image model
     except Exception as e:
         return jsonify({"success": False, "message": f"Error: {str(e)}"}), 500
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=7860)