Spaces:
Paused
Paused
import gradio as gr | |
import google.generativeai as genai | |
from PIL import Image | |
import io | |
import base64 | |
import requests | |
# List of popular styles | |
STYLES = [ | |
"Photorealistic", "Oil Painting", "Watercolor", "Anime", | |
"Studio Ghibli", "Black and White", "Polaroid", "Sketch", | |
"3D Render", "Pixel Art", "Cyberpunk", "Steampunk", | |
"Art Nouveau", "Pop Art", "Minimalist" | |
] | |
# Default negative prompt | |
DEFAULT_NEGATIVE_PROMPT = """ | |
ugly, tiling, poorly drawn hands, poorly drawn feet, poorly drawn face, out of frame, | |
extra limbs, disfigured, deformed, body out of frame, bad anatomy, watermark, signature, | |
cut off, low contrast, underexposed, overexposed, bad art, beginner, amateur, distorted face | |
""" | |
def enhance_prompt(prompt, style): | |
model = genai.GenerativeModel("gemini-2.0-flash-lite") | |
enhanced_prompt_request = f""" | |
Task: Enhance the following prompt for image generation. | |
Style: {style} | |
Original prompt: '{prompt}' | |
Instructions: | |
1. Expand the prompt to be more detailed and vivid. | |
2. Incorporate elements of the specified style. | |
3. Maintain the original intent of the prompt. | |
4. Provide ONLY the enhanced prompt, without any explanations or options. | |
5. Keep the enhanced prompt concise, ideally under 100 words. | |
Enhanced prompt: | |
""" | |
response = model.generate_content(enhanced_prompt_request) | |
# Extract only the enhanced prompt, removing any potential explanations or extra text | |
enhanced_prompt = response.text.strip() | |
# If the response starts with "Enhanced prompt:" or similar, remove it | |
prefixes_to_remove = ["Enhanced prompt:", "Here's the enhanced prompt:", "The enhanced prompt is:"] | |
for prefix in prefixes_to_remove: | |
if enhanced_prompt.lower().startswith(prefix.lower()): | |
enhanced_prompt = enhanced_prompt[len(prefix):].strip() | |
return enhanced_prompt | |
def generate_image(stability_api_key, enhanced_prompt, style, negative_prompt): | |
url = "https://api.stability.ai/v2beta/stable-image/generate/sd3" | |
headers = { | |
"Accept": "application/json", | |
"Content-Type": "application/json", | |
"Authorization": f"Bearer {stability_api_key}" | |
} | |
payload = { | |
"model": "sd3.5-large-turbo", | |
"prompt": f"{enhanced_prompt}, Style: {style}", | |
"negative_prompt": negative_prompt, | |
"width": 1024, | |
"height": 1024, | |
"num_images": 1, | |
"steps": 30, | |
"cfg_scale": 7.5 | |
} | |
response = requests.post(url, json=payload, headers=headers) | |
if response.status_code == 200: | |
image_data = response.json()['images'][0]['image_base64'] | |
return f"data:image/png;base64,{image_data}" | |
else: | |
return f"Image generation failed: {response.text}" | |
def process_and_generate(google_api_key, stability_api_key, prompt, style, negative_prompt): | |
genai.configure(api_key=google_api_key) | |
enhanced_prompt = enhance_prompt(prompt, style) | |
image_url = generate_image(stability_api_key, enhanced_prompt, style, negative_prompt) | |
return image_url, enhanced_prompt | |
with gr.Blocks() as demo: | |
gr.Markdown("# Stability AI Image Generator with Google Gemini Prompt Enhancement") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
google_api_key = gr.Textbox(label="Google AI API Key", type="password") | |
stability_api_key = gr.Textbox(label="Stability AI API Key", type="password") | |
prompt = gr.Textbox(label="Prompt") | |
style = gr.Dropdown(label="Style", choices=STYLES) | |
negative_prompt = gr.Textbox(label="Negative Prompt", value=DEFAULT_NEGATIVE_PROMPT) | |
generate_btn = gr.Button("Generate Image") | |
with gr.Column(scale=1): | |
image_output = gr.Image(label="Generated Image") | |
enhanced_prompt_output = gr.Textbox(label="Enhanced Prompt") | |
generate_btn.click( | |
process_and_generate, | |
inputs=[google_api_key, stability_api_key, prompt, style, negative_prompt], | |
outputs=[image_output, enhanced_prompt_output] | |
) | |
demo.launch() |