Spaces:
Paused
Paused
# app.py | |
import gradio as gr | |
from PIL import Image | |
import os | |
import torch | |
import numpy as np | |
import cv2 | |
from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel | |
from utils.planner import extract_scene_plan, generate_prompt_variations_from_scene # π§ Brain Layer | |
# ---------------------------- | |
# π§ Device Setup | |
# ---------------------------- | |
device = "cpu" # β Using CPU for now | |
dtype = torch.float32 | |
# ---------------------------- | |
# β Load ControlNet + SDXL Model (Corrected) | |
# ---------------------------- | |
controlnet = ControlNetModel.from_pretrained( | |
"diffusers/controlnet-canny-sdxl-1.0", # Use official Canny + SDXL ControlNet model | |
torch_dtype=dtype | |
) | |
pipe = StableDiffusionXLControlNetPipeline.from_pretrained( | |
"stabilityai/stable-diffusion-xl-base-1.0", | |
controlnet=controlnet, | |
torch_dtype=dtype | |
).to(device) | |
# ---------------------------- | |
# π Canny Edge Generator | |
# ---------------------------- | |
def generate_canny_map(image: Image.Image) -> Image.Image: | |
print("π Generating Canny map...") | |
if image is None: | |
raise ValueError("π« No image passed to Canny generator") | |
image = image.resize((1024, 1024)).convert("RGB") | |
np_image = np.array(image) | |
gray = cv2.cvtColor(np_image, cv2.COLOR_RGB2GRAY) | |
edges = cv2.Canny(gray, 100, 200) | |
if edges is None: | |
raise ValueError("π« OpenCV Canny failed to produce edge map") | |
return Image.fromarray(edges).convert("RGB") | |
# ---------------------------- | |
# π¨ Image Generation Function | |
# ---------------------------- | |
def process_image(prompt, image, num_variations): | |
try: | |
print("π§ Prompt received:", prompt) | |
if image is None: | |
raise ValueError("π« Uploaded image is missing or invalid.") | |
# Step 1: Extract scene plan | |
scene_plan = extract_scene_plan(prompt) | |
print("π§ Scene plan extracted:", scene_plan) | |
# Step 2: Generate enriched prompt variations | |
prompt_list = generate_prompt_variations_from_scene(scene_plan, prompt, num_variations) | |
print("π§ Enriched Prompts:") | |
for i, p in enumerate(prompt_list): | |
print(f" {i+1}: {p}") | |
# Step 3: Prepare image and Canny edge | |
image = image.resize((1024, 1024)).convert("RGB") | |
canny_map = generate_canny_map(image) | |
outputs = [] | |
for i, enriched_prompt in enumerate(prompt_list): | |
print(f"π¨ Generating image {i+1}...") | |
try: | |
result = pipe( | |
prompt=enriched_prompt, | |
image=image, | |
control_image=canny_map, | |
num_inference_steps=30, | |
strength=0.5, | |
guidance_scale=7.5 | |
) | |
outputs.append(result.images[0]) | |
except Exception as err: | |
print(f"β Failed to generate image {i+1}:", err) | |
outputs.append(Image.new("RGB", (512, 512), color="red")) | |
return outputs, scene_plan, canny_map | |
except Exception as e: | |
print("β Generation failed:", e) | |
return ["β Error during generation"], {"error": str(e)}, None | |
# ---------------------------- | |
# πΌ Gradio UI | |
# ---------------------------- | |
with gr.Blocks() as demo: | |
gr.Markdown("## π§ NewCrux AI β SDXL + Canny (CPU Mode)\nUpload a product image, enter a prompt, and generate enhanced visuals using ControlNet.") | |
with gr.Row(): | |
with gr.Column(): | |
prompt_input = gr.Textbox(label="Prompt") | |
image_input = gr.Image(type="pil", label="Upload Product Image") | |
variation_slider = gr.Slider(1, 4, step=1, value=1, label="Number of Variations") | |
generate_btn = gr.Button("Generate") | |
with gr.Column(): | |
output_gallery = gr.Gallery(label="Generated Variations", columns=2, rows=2, height="auto") | |
json_output = gr.JSON(label="π§ Brain Layer Reasoning") | |
canny_preview = gr.Image(label="π Canny Edge Preview") | |
generate_btn.click( | |
fn=process_image, | |
inputs=[prompt_input, image_input, variation_slider], | |
outputs=[output_gallery, json_output, canny_preview] | |
) | |
demo.launch() | |