Spaces:
Paused
Paused
# app.py | |
import gradio as gr | |
from PIL import Image | |
import os | |
import torch | |
import numpy as np | |
import cv2 | |
from diffusers import StableDiffusionControlNetImg2ImgPipeline, ControlNetModel | |
from utils.planner import extract_scene_plan # π§ Brain Layer | |
# ---------------------------- | |
# π§ Device Setup | |
# ---------------------------- | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
dtype = torch.float16 if device == "cuda" else torch.float32 | |
# ---------------------------- | |
# π¦ Load ControlNet + SDXL Model | |
# ---------------------------- | |
controlnet = ControlNetModel.from_pretrained( | |
"lllyasviel/control_v11p_sd15_canny", | |
torch_dtype=dtype | |
) | |
pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained( | |
"runwayml/stable-diffusion-v1-5", # use SDXL if desired: "stabilityai/stable-diffusion-xl-base-1.0" | |
controlnet=controlnet, | |
torch_dtype=dtype | |
).to(device) | |
pipe.enable_xformers_memory_efficient_attention() if device == "cuda" else None | |
# ---------------------------- | |
# πΌ Canny Edge Generator | |
# ---------------------------- | |
def generate_canny_map(image: Image.Image) -> Image.Image: | |
print("π Generating Canny map...") | |
image = image.resize((512, 512)).convert("RGB") | |
np_image = np.array(image) | |
np_image = cv2.cvtColor(np_image, cv2.COLOR_RGB2GRAY) | |
edges = cv2.Canny(np_image, 100, 200) | |
canny_image = Image.fromarray(edges).convert("RGB") | |
return canny_image | |
# ---------------------------- | |
# π¨ Image Generation Function | |
# ---------------------------- | |
def process_image(prompt, image, num_variations): | |
try: | |
print("π§ Prompt received:", prompt) | |
# Step 1: Brain Layer | |
reasoning_json = extract_scene_plan(prompt) | |
print("π§ Scene plan extracted:", reasoning_json) | |
# Step 2: Prepare inputs | |
image = image.resize((512, 512)).convert("RGB") | |
canny_map = generate_canny_map(image) | |
outputs = [] | |
for i in range(num_variations): | |
print(f"π¨ Generating variation {i+1}") | |
result = pipe( | |
prompt=prompt, | |
image=image, | |
control_image=canny_map, | |
num_inference_steps=40, | |
strength=0.9, | |
guidance_scale=7.5 | |
) | |
generated_image = result.images[0] | |
outputs.append(generated_image) | |
return outputs, reasoning_json, canny_map | |
except Exception as e: | |
print("β Generation failed:", e) | |
return ["β Error during generation"], {"error": str(e)}, None | |
# ---------------------------- | |
# πΌ Gradio UI | |
# ---------------------------- | |
with gr.Blocks() as demo: | |
gr.Markdown("## π§ NewCrux AI β SDXL + Canny Inference\nUpload a product image, enter a prompt, and generate stylized scenes while preserving structure.") | |
with gr.Row(): | |
with gr.Column(): | |
prompt_input = gr.Textbox(label="Prompt") | |
image_input = gr.Image(type="pil", label="Upload Product Image") | |
variation_slider = gr.Slider(1, 4, step=1, value=1, label="Number of Variations") | |
generate_btn = gr.Button("Generate") | |
with gr.Column(): | |
output_gallery = gr.Gallery( | |
label="Generated Variations", | |
columns=2, | |
rows=2, | |
height="auto" | |
) | |
json_output = gr.JSON(label="π§ Brain Layer Reasoning") | |
canny_preview = gr.Image(label="π Canny Edge Preview") | |
generate_btn.click( | |
fn=process_image, | |
inputs=[prompt_input, image_input, variation_slider], | |
outputs=[output_gallery, json_output, canny_preview] | |
) | |
demo.launch() | |