File size: 4,888 Bytes
fdd3761
 
8124057
 
c1ad0ba
37abfc2
2736052
 
 
bb3cc4e
38d816c
37abfc2
 
 
 
 
 
 
2736052
37abfc2
2736052
4f9120d
2736052
 
 
 
6e52c0e
2736052
6e52c0e
 
4895c45
37abfc2
6e52c0e
 
 
 
a218b7f
 
2736052
a218b7f
2736052
 
6e52c0e
 
 
 
4f9120d
2736052
6e52c0e
 
37abfc2
6e52c0e
 
 
 
0a10f4f
37abfc2
 
 
a218b7f
d3a3bf1
 
37abfc2
6e52c0e
 
 
c67583f
129d6d6
 
d3a3bf1
c67583f
bb3cc4e
c67583f
129d6d6
 
 
 
c67583f
c6e6083
2736052
d3a3bf1
6e52c0e
 
 
c67583f
52b4e6d
129d6d6
 
c67583f
 
 
 
6e52c0e
c67583f
 
 
 
 
 
 
 
 
 
 
 
 
37abfc2
129d6d6
38d816c
d3a3bf1
52b4e6d
2736052
8124057
37abfc2
52b4e6d
37abfc2
8124057
2736052
38d816c
8124057
38d816c
37abfc2
38d816c
fdd3761
38d816c
8124057
38d816c
fdd3761
37abfc2
fdd3761
 
 
 
37abfc2
2736052
38d816c
 
 
a218b7f
2736052
38d816c
8124057
37abfc2
38d816c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# app.py

import gradio as gr
from PIL import Image
import os
import torch
import numpy as np
import cv2
from diffusers import StableDiffusionControlNetImg2ImgPipeline, ControlNetModel
from utils.planner import extract_scene_plan, generate_prompt_variations_from_scene  # 🧠 Brain Layer

# ----------------------------
# πŸ”§ Device Setup
# ----------------------------
device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.float16 if device == "cuda" else torch.float32

# ----------------------------
# πŸ“¦ Load ControlNet + SDXL Model
# ----------------------------
controlnet = ControlNetModel.from_pretrained(
    "diffusers/controlnet-canny-sdxl-1.0",
    torch_dtype=dtype
)

pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    controlnet=controlnet,
    torch_dtype=dtype,
    variant="fp16" if dtype == torch.float16 else None
).to(device)

if device == "cuda":
    pipe.enable_xformers_memory_efficient_attention()
else:
    pipe.enable_model_cpu_offload()

# ----------------------------
# πŸ–Ό Canny Edge Generator
# ----------------------------
def generate_canny_map(image: Image.Image) -> Image.Image:
    print("πŸ” Generating Canny map...")

    if image is None:
        raise ValueError("🚫 No image passed to Canny generator")

    image = image.resize((1024, 1024)).convert("RGB")
    np_image = np.array(image)
    gray = cv2.cvtColor(np_image, cv2.COLOR_RGB2GRAY)
    edges = cv2.Canny(gray, 100, 200)

    if edges is None:
        raise ValueError("🚫 OpenCV Canny failed to produce edge map")

    return Image.fromarray(edges).convert("RGB")

# ----------------------------
# 🎨 Image Generation Function
# ----------------------------
def process_image(prompt, image, num_variations):
    try:
        print("🧠 Prompt received:", prompt)

        if image is None:
            raise ValueError("🚫 Uploaded image is missing or invalid.")

        # Step 1: Brain Layer (Scene Plan)
        scene_plan = extract_scene_plan(prompt)
        print("🧠 Scene plan extracted:", scene_plan)

        # Step 2: Generate prompt variations from GPT
        prompt_list = generate_prompt_variations_from_scene(scene_plan, prompt, num_variations)

        print("🧠 Enriched Prompts:")
        for i, p in enumerate(prompt_list):
            print(f"  {i+1}: {p}")

        # Step 3: Resize image + generate canny map (1024x1024 for SDXL)
        image = image.resize((1024, 1024)).convert("RGB")
        canny_map = generate_canny_map(image)

        if canny_map is None:
            raise ValueError("🚫 Canny map generation failed.")

        # Step 4: Generate images
        outputs = []
        for i, enriched_prompt in enumerate(prompt_list):
            print(f"🎨 Generating image {i+1} with enriched prompt")
            try:
                result = pipe(
                    prompt=enriched_prompt,
                    image=image,
                    controlnet_conditioning_image=canny_map,
                    num_inference_steps=40,
                    strength=0.5,
                    guidance_scale=7.5
                )

                if result is None or not hasattr(result, "images") or not result.images:
                    raise ValueError("⚠️ No image returned from pipeline")

                outputs.append(result.images[0])

            except Exception as inner:
                print(f"❌ Failed to generate image {i+1}:", inner)
                outputs.append(Image.new("RGB", (512, 512), color="red"))

        return outputs, scene_plan, canny_map

    except Exception as e:
        print("❌ Generation failed:", e)
        return ["❌ Error during generation"], {"error": str(e)}, None

# ----------------------------
# πŸ–Ό Gradio UI
# ----------------------------
with gr.Blocks() as demo:
    gr.Markdown("## 🧠 NewCrux AI β€” SDXL + Canny Inference\nUpload a product image, enter a prompt, and generate stylized scenes while preserving structure.")

    with gr.Row():
        with gr.Column():
            prompt_input = gr.Textbox(label="Prompt")
            image_input = gr.Image(type="pil", label="Upload Product Image")
            variation_slider = gr.Slider(1, 4, step=1, value=1, label="Number of Variations")
            generate_btn = gr.Button("Generate")

        with gr.Column():
            output_gallery = gr.Gallery(
                label="Generated Variations",
                columns=2,
                rows=2,
                height="auto"
            )
            json_output = gr.JSON(label="🧠 Brain Layer Reasoning")
            canny_preview = gr.Image(label="πŸ” Canny Edge Preview")

    generate_btn.click(
        fn=process_image,
        inputs=[prompt_input, image_input, variation_slider],
        outputs=[output_gallery, json_output, canny_preview]
    )

demo.launch()