Spaces:

Athspi
/

Gttg

Sleeping

File size: 5,435 Bytes

e5c238d
a36d15c
8e6ca2b
a36d15c
 
8e6ca2b
 
 
a36d15c
e5c238d
 
c90c576
9479bea
8e6ca2b
 
a36d15c
 
 
 
 
 
 
 
9479bea
 
a36d15c
c90c576
8e6ca2b
a36d15c
 
 
 
 
 
 
 
 
9479bea
a36d15c
 
 
 
 
 
8e6ca2b
 
a36d15c
8e6ca2b
 
 
a36d15c
9479bea
a36d15c
8e6ca2b
 
68780eb
8e6ca2b
68780eb
 
 
 
 
 
c90c576
 
 
 
68780eb
e5c238d
8e6ca2b
d2236a2
9479bea
8e6ca2b
 
68780eb
 
 
8e6ca2b
 
 
c90c576
8e6ca2b
 
c90c576
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8e6ca2b
a36d15c
68780eb
9479bea
a36d15c
 
 
 
 
9479bea
a36d15c
 
 
 
 
 
9479bea
a36d15c
 
c90c576
a36d15c
 
 
 
 
 
 
8e6ca2b
 
9479bea
fea0355

import os
import base64
import mimetypes
from flask import Flask, render_template, request, jsonify
from werkzeug.utils import secure_filename
from google import genai
from google.genai import types

# Initialize Flask app
app = Flask(__name__)

# Read the Gemini API key from environment variables (set in Hugging Face Spaces)
GEMINI_API_KEY = os.environ["GEMINI_API_KEY"]
client = genai.Client(api_key=GEMINI_API_KEY)

# Create necessary directories
UPLOAD_FOLDER = 'uploads'
RESULT_FOLDER = os.path.join('static')
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
os.makedirs(RESULT_FOLDER, exist_ok=True)

def generate_gemini_output(object_type, image_data_url):
    """
    Generate output from Gemini by removing the specified object.
    Expects the image_data_url to be a base64 data URL.
    """
    model = "gemini-2.0-flash-lite"  # Use the lite model for text-based responses
    files = []

    # Decode the image data from the data URL
    if image_data_url:
        try:
            header, encoded = image_data_url.split(',', 1)
        except ValueError:
            raise ValueError("Invalid image data")
        binary_data = base64.b64decode(encoded)
        # Determine file extension from header
        ext = ".png" if "png" in header.lower() else ".jpg"
        temp_filename = secure_filename("temp_image" + ext)
        temp_filepath = os.path.join(UPLOAD_FOLDER, temp_filename)
        with open(temp_filepath, "wb") as f:
            f.write(binary_data)
        # Upload file to Gemini
        uploaded_file = client.files.upload(file=temp_filepath)
        files.append(uploaded_file)

    # Prepare content parts for Gemini
    parts = []
    if files:
        parts.append(types.Part.from_uri(file_uri=files[0].uri, mime_type=files[0].mime_type))
    if object_type:
        # Gemini magic prompt: instruct the model to remove the specified object
        magic_prompt = f"Remove {object_type} from the image"
        parts.append(types.Part.from_text(text=magic_prompt))

    contents = [types.Content(role="user", parts=parts)]

    generate_content_config = types.GenerateContentConfig(
        temperature=1,
        top_p=0.95,
        top_k=40,
        max_output_tokens=8192,
        response_mime_type="text/plain",
        system_instruction=[
            types.Part.from_text(text="""Your AI finds user requests about removing objects from images.
If the user asks to remove a person or animal, respond with 'No'."""),
        ],
    )

    result_text = None

    # Stream output from Gemini API
    for chunk in client.models.generate_content_stream(
        model=model,
        contents=contents,
        config=generate_content_config,
    ):
        if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
            continue
        part = chunk.candidates[0].content.parts[0]
        if part.text:
            result_text = part.text

    # If the response is "No", switch to the image generation model
    if result_text and "no" in result_text.lower():
        model = "gemini-2.0-flash-exp-image-generation"
        generate_content_config.response_modalities = ["image", "text"]
        for chunk in client.models.generate_content_stream(
            model=model,
            contents=contents,
            config=generate_content_config,
        ):
            if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
                continue
            part = chunk.candidates[0].content.parts[0]
            if part.inline_data:
                file_extension = mimetypes.guess_extension(part.inline_data.mime_type) or ".png"
                output_filename = secure_filename("generated_output" + file_extension)
                result_image_path = os.path.join(RESULT_FOLDER, output_filename)
                with open(result_image_path, "wb") as f:
                    f.write(part.inline_data.data)
                result_image = result_image_path  # Path relative to static folder
                return result_text, result_image

    return result_text, None

@app.route("/")
def index():
    # Render the front-end HTML (which contains complete HTML/CSS/JS inline)
    return render_template("index.html")

@app.route("/process", methods=["POST"])
def process():
    try:
        # Expect JSON with keys "image" (base64 data URL) and "objectType"
        data = request.get_json(force=True)
        image_data = data.get("image")
        object_type = data.get("objectType", "").strip()
        if not image_data or not object_type:
            return jsonify({"success": False, "message": "Missing image data or object type."}), 400

        # Generate output using Gemini
        result_text, result_image = generate_gemini_output(object_type, image_data)
        if not result_image:
            return jsonify({"success": False, "message": result_text or "Failed to generate image."}), 500

        # Create a URL to serve the image from the static folder.
        image_url = f"/static/{os.path.basename(result_image)}"

        return jsonify({"success": True, "resultPath": image_url, "resultText": result_text})
    except Exception as e:
        return jsonify({"success": False, "message": f"Error: {str(e)}"}), 500

if __name__ == "__main__":
    # Run the app on port 5000 or the port provided by the environment (for Hugging Face Spaces)
    app.run(host="0.0.0.0", port=7860)