Spaces:
Sleeping
Sleeping
File size: 5,435 Bytes
e5c238d a36d15c 8e6ca2b a36d15c 8e6ca2b a36d15c e5c238d c90c576 9479bea 8e6ca2b a36d15c 9479bea a36d15c c90c576 8e6ca2b a36d15c 9479bea a36d15c 8e6ca2b a36d15c 8e6ca2b a36d15c 9479bea a36d15c 8e6ca2b 68780eb 8e6ca2b 68780eb c90c576 68780eb e5c238d 8e6ca2b d2236a2 9479bea 8e6ca2b 68780eb 8e6ca2b c90c576 8e6ca2b c90c576 8e6ca2b a36d15c 68780eb 9479bea a36d15c 9479bea a36d15c 9479bea a36d15c c90c576 a36d15c 8e6ca2b 9479bea fea0355 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import os
import base64
import mimetypes
from flask import Flask, render_template, request, jsonify
from werkzeug.utils import secure_filename
from google import genai
from google.genai import types
# Initialize Flask app
app = Flask(__name__)
# Read the Gemini API key from environment variables (set in Hugging Face Spaces)
GEMINI_API_KEY = os.environ["GEMINI_API_KEY"]
client = genai.Client(api_key=GEMINI_API_KEY)
# Create necessary directories
UPLOAD_FOLDER = 'uploads'
RESULT_FOLDER = os.path.join('static')
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
os.makedirs(RESULT_FOLDER, exist_ok=True)
def generate_gemini_output(object_type, image_data_url):
"""
Generate output from Gemini by removing the specified object.
Expects the image_data_url to be a base64 data URL.
"""
model = "gemini-2.0-flash-lite" # Use the lite model for text-based responses
files = []
# Decode the image data from the data URL
if image_data_url:
try:
header, encoded = image_data_url.split(',', 1)
except ValueError:
raise ValueError("Invalid image data")
binary_data = base64.b64decode(encoded)
# Determine file extension from header
ext = ".png" if "png" in header.lower() else ".jpg"
temp_filename = secure_filename("temp_image" + ext)
temp_filepath = os.path.join(UPLOAD_FOLDER, temp_filename)
with open(temp_filepath, "wb") as f:
f.write(binary_data)
# Upload file to Gemini
uploaded_file = client.files.upload(file=temp_filepath)
files.append(uploaded_file)
# Prepare content parts for Gemini
parts = []
if files:
parts.append(types.Part.from_uri(file_uri=files[0].uri, mime_type=files[0].mime_type))
if object_type:
# Gemini magic prompt: instruct the model to remove the specified object
magic_prompt = f"Remove {object_type} from the image"
parts.append(types.Part.from_text(text=magic_prompt))
contents = [types.Content(role="user", parts=parts)]
generate_content_config = types.GenerateContentConfig(
temperature=1,
top_p=0.95,
top_k=40,
max_output_tokens=8192,
response_mime_type="text/plain",
system_instruction=[
types.Part.from_text(text="""Your AI finds user requests about removing objects from images.
If the user asks to remove a person or animal, respond with 'No'."""),
],
)
result_text = None
# Stream output from Gemini API
for chunk in client.models.generate_content_stream(
model=model,
contents=contents,
config=generate_content_config,
):
if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
continue
part = chunk.candidates[0].content.parts[0]
if part.text:
result_text = part.text
# If the response is "No", switch to the image generation model
if result_text and "no" in result_text.lower():
model = "gemini-2.0-flash-exp-image-generation"
generate_content_config.response_modalities = ["image", "text"]
for chunk in client.models.generate_content_stream(
model=model,
contents=contents,
config=generate_content_config,
):
if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
continue
part = chunk.candidates[0].content.parts[0]
if part.inline_data:
file_extension = mimetypes.guess_extension(part.inline_data.mime_type) or ".png"
output_filename = secure_filename("generated_output" + file_extension)
result_image_path = os.path.join(RESULT_FOLDER, output_filename)
with open(result_image_path, "wb") as f:
f.write(part.inline_data.data)
result_image = result_image_path # Path relative to static folder
return result_text, result_image
return result_text, None
@app.route("/")
def index():
# Render the front-end HTML (which contains complete HTML/CSS/JS inline)
return render_template("index.html")
@app.route("/process", methods=["POST"])
def process():
try:
# Expect JSON with keys "image" (base64 data URL) and "objectType"
data = request.get_json(force=True)
image_data = data.get("image")
object_type = data.get("objectType", "").strip()
if not image_data or not object_type:
return jsonify({"success": False, "message": "Missing image data or object type."}), 400
# Generate output using Gemini
result_text, result_image = generate_gemini_output(object_type, image_data)
if not result_image:
return jsonify({"success": False, "message": result_text or "Failed to generate image."}), 500
# Create a URL to serve the image from the static folder.
image_url = f"/static/{os.path.basename(result_image)}"
return jsonify({"success": True, "resultPath": image_url, "resultText": result_text})
except Exception as e:
return jsonify({"success": False, "message": f"Error: {str(e)}"}), 500
if __name__ == "__main__":
# Run the app on port 5000 or the port provided by the environment (for Hugging Face Spaces)
app.run(host="0.0.0.0", port=7860) |