merge controlnet
Browse files- .env.example +1 -1
- .gitignore +2 -1
- Project.md +35 -2
- api.py +37 -21
- api_example.py +10 -1
- app.py +52 -21
- config.py +7 -0
- controlnet_pipeline.py +90 -0
- main.py +7 -2
- requirements.txt +6 -0
- spaces_config.json +9 -1
.env.example
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
# Hugging Face token
|
2 |
-
HF_TOKEN=
|
3 |
|
4 |
# API settings
|
5 |
API_HOST=0.0.0.0
|
|
|
1 |
# Hugging Face token
|
2 |
+
HF_TOKEN=your_token_here
|
3 |
|
4 |
# API settings
|
5 |
API_HOST=0.0.0.0
|
.gitignore
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
.venv
|
2 |
*.pyc
|
3 |
__pycache__
|
4 |
-
.env
|
|
|
|
1 |
.venv
|
2 |
*.pyc
|
3 |
__pycache__
|
4 |
+
.env
|
5 |
+
*.env
|
Project.md
CHANGED
@@ -1,11 +1,12 @@
|
|
1 |
# Diffusion Models App
|
2 |
|
3 |
-
A Python application that uses Hugging Face inference endpoints for text-to-image and image-to-image generation with a Gradio UI and API endpoints.
|
4 |
|
5 |
## Features
|
6 |
|
7 |
- Text-to-image generation
|
8 |
- Image-to-image transformation with optional prompt
|
|
|
9 |
- Gradio UI for interactive use
|
10 |
- API endpoints for integration with other applications
|
11 |
- Configurable models via text input
|
@@ -17,16 +18,26 @@ A Python application that uses Hugging Face inference endpoints for text-to-imag
|
|
17 |
- `app.py` - Gradio UI implementation
|
18 |
- `api.py` - FastAPI server for API endpoints
|
19 |
- `inference.py` - Core functionality for HF inference
|
|
|
20 |
- `config.py` - Configuration and settings
|
21 |
- `requirements.txt` - Dependencies
|
22 |
|
23 |
## Setup & Usage
|
24 |
|
|
|
25 |
1. Clone the repository
|
26 |
-
2. Create a
|
27 |
3. Install dependencies: `pip install -r requirements.txt`
|
28 |
4. Run the application: `python main.py`
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
## Running Options
|
31 |
|
32 |
- Run both UI and API: `python main.py`
|
@@ -47,6 +58,28 @@ The application includes defaults for:
|
|
47 |
|
48 |
These defaults are applied to both the Gradio UI and API endpoints for consistency.
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
## Environment Variables
|
51 |
|
52 |
- `HF_TOKEN` - Your Hugging Face API token
|
|
|
1 |
# Diffusion Models App
|
2 |
|
3 |
+
A Python application that uses Hugging Face inference endpoints and on-device models for text-to-image and image-to-image generation with a Gradio UI and API endpoints.
|
4 |
|
5 |
## Features
|
6 |
|
7 |
- Text-to-image generation
|
8 |
- Image-to-image transformation with optional prompt
|
9 |
+
- ControlNet depth-based image transformation
|
10 |
- Gradio UI for interactive use
|
11 |
- API endpoints for integration with other applications
|
12 |
- Configurable models via text input
|
|
|
18 |
- `app.py` - Gradio UI implementation
|
19 |
- `api.py` - FastAPI server for API endpoints
|
20 |
- `inference.py` - Core functionality for HF inference
|
21 |
+
- `controlnet_pipeline.py` - ControlNet depth model pipeline
|
22 |
- `config.py` - Configuration and settings
|
23 |
- `requirements.txt` - Dependencies
|
24 |
|
25 |
## Setup & Usage
|
26 |
|
27 |
+
### Local Development
|
28 |
1. Clone the repository
|
29 |
+
2. Create a `.env` file with your Hugging Face token (copy from `.env.example`)
|
30 |
3. Install dependencies: `pip install -r requirements.txt`
|
31 |
4. Run the application: `python main.py`
|
32 |
|
33 |
+
### Hugging Face Spaces Deployment
|
34 |
+
1. Never commit the `.env` file with your token to the repository!
|
35 |
+
2. Instead, add your HF_TOKEN as a secret in the Spaces UI:
|
36 |
+
- Go to your Space's Settings tab
|
37 |
+
- Navigate to Repository Secrets
|
38 |
+
- Add a secret named `HF_TOKEN` with your token as the value
|
39 |
+
3. The application will automatically use this secret in the Spaces environment
|
40 |
+
|
41 |
## Running Options
|
42 |
|
43 |
- Run both UI and API: `python main.py`
|
|
|
58 |
|
59 |
These defaults are applied to both the Gradio UI and API endpoints for consistency.
|
60 |
|
61 |
+
## ControlNet Implementation
|
62 |
+
|
63 |
+
The application now supports running a ControlNet depth model directly on the Hugging Face Spaces GPU using the `spaces.GPU` decorator. This feature allows for:
|
64 |
+
|
65 |
+
1. **On-device processing**: Instead of relying solely on remote inference endpoints, the app can now perform image transformations using the local GPU.
|
66 |
+
|
67 |
+
2. **Depth-based transformations**: The ControlNet implementation extracts depth information from the input image, allowing for more structure-preserving transformations.
|
68 |
+
|
69 |
+
3. **Integration with existing workflow**: The ControlNet option is seamlessly integrated into the image-to-image tab via a simple checkbox.
|
70 |
+
|
71 |
+
### How it works:
|
72 |
+
|
73 |
+
1. When a user uploads an image and enables the ControlNet option, the app processes the image through a depth estimator.
|
74 |
+
2. The depth map is then used by the ControlNet model to guide the image generation process.
|
75 |
+
3. The `spaces.GPU` decorator ensures that these operations run on the GPU for optimal performance.
|
76 |
+
4. The resulting image maintains the spatial structure of the original while applying the creative transformation specified in the prompt.
|
77 |
+
|
78 |
+
The implementation uses:
|
79 |
+
- `stable-diffusion-v1-5` as the base model
|
80 |
+
- `lllyasviel/sd-controlnet-depth` as the ControlNet model
|
81 |
+
- The HuggingFace Transformers depth estimation pipeline
|
82 |
+
|
83 |
## Environment Variables
|
84 |
|
85 |
- `HF_TOKEN` - Your Hugging Face API token
|
api.py
CHANGED
@@ -5,12 +5,16 @@ import io
|
|
5 |
import uvicorn
|
6 |
import config
|
7 |
from inference import DiffusionInference
|
|
|
8 |
|
9 |
app = FastAPI(title="Diffusion Models API")
|
10 |
|
11 |
# Initialize the inference class
|
12 |
inference = DiffusionInference()
|
13 |
|
|
|
|
|
|
|
14 |
@app.get("/")
|
15 |
async def root():
|
16 |
return {"message": "Diffusion Models API is running"}
|
@@ -58,6 +62,7 @@ async def image_to_image(
|
|
58 |
image: UploadFile = File(...),
|
59 |
prompt: str = Form(config.DEFAULT_IMG2IMG_PROMPT),
|
60 |
model: str = Form(config.DEFAULT_IMG2IMG_MODEL),
|
|
|
61 |
negative_prompt: str = Form(config.DEFAULT_NEGATIVE_PROMPT),
|
62 |
guidance_scale: float = Form(7.5),
|
63 |
num_inference_steps: int = Form(50)
|
@@ -70,27 +75,38 @@ async def image_to_image(
|
|
70 |
contents = await image.read()
|
71 |
input_image = Image.open(io.BytesIO(contents))
|
72 |
|
73 |
-
# Use
|
74 |
-
if
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
prompt
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
# Convert PIL image to bytes
|
96 |
img_byte_arr = io.BytesIO()
|
|
|
5 |
import uvicorn
|
6 |
import config
|
7 |
from inference import DiffusionInference
|
8 |
+
from controlnet_pipeline import ControlNetPipeline
|
9 |
|
10 |
app = FastAPI(title="Diffusion Models API")
|
11 |
|
12 |
# Initialize the inference class
|
13 |
inference = DiffusionInference()
|
14 |
|
15 |
+
# Initialize the ControlNet pipeline
|
16 |
+
controlnet = ControlNetPipeline()
|
17 |
+
|
18 |
@app.get("/")
|
19 |
async def root():
|
20 |
return {"message": "Diffusion Models API is running"}
|
|
|
62 |
image: UploadFile = File(...),
|
63 |
prompt: str = Form(config.DEFAULT_IMG2IMG_PROMPT),
|
64 |
model: str = Form(config.DEFAULT_IMG2IMG_MODEL),
|
65 |
+
use_controlnet: bool = Form(False),
|
66 |
negative_prompt: str = Form(config.DEFAULT_NEGATIVE_PROMPT),
|
67 |
guidance_scale: float = Form(7.5),
|
68 |
num_inference_steps: int = Form(50)
|
|
|
75 |
contents = await image.read()
|
76 |
input_image = Image.open(io.BytesIO(contents))
|
77 |
|
78 |
+
# Use ControlNet if specified
|
79 |
+
if use_controlnet and config.USE_CONTROLNET:
|
80 |
+
# Process with ControlNet pipeline
|
81 |
+
result = controlnet.generate(
|
82 |
+
prompt=prompt,
|
83 |
+
image=input_image,
|
84 |
+
negative_prompt=negative_prompt,
|
85 |
+
guidance_scale=guidance_scale,
|
86 |
+
num_inference_steps=num_inference_steps
|
87 |
+
)
|
88 |
+
else:
|
89 |
+
# Use default model if not specified or empty
|
90 |
+
if not model or model.strip() == '':
|
91 |
+
model = config.DEFAULT_IMG2IMG_MODEL
|
92 |
+
|
93 |
+
# Use default prompt if not specified or empty
|
94 |
+
if not prompt or prompt.strip() == '':
|
95 |
+
prompt = config.DEFAULT_IMG2IMG_PROMPT
|
96 |
+
|
97 |
+
# Use default negative prompt if not specified or empty
|
98 |
+
if not negative_prompt or negative_prompt.strip() == '':
|
99 |
+
negative_prompt = config.DEFAULT_NEGATIVE_PROMPT
|
100 |
+
|
101 |
+
# Call the inference module
|
102 |
+
result = inference.image_to_image(
|
103 |
+
image=input_image,
|
104 |
+
prompt=prompt,
|
105 |
+
model_name=model,
|
106 |
+
negative_prompt=negative_prompt,
|
107 |
+
guidance_scale=guidance_scale,
|
108 |
+
num_inference_steps=num_inference_steps
|
109 |
+
)
|
110 |
|
111 |
# Convert PIL image to bytes
|
112 |
img_byte_arr = io.BytesIO()
|
api_example.py
CHANGED
@@ -52,7 +52,7 @@ def text_to_image(prompt=None, model=None, negative_prompt=None, guidance_scale=
|
|
52 |
return None
|
53 |
|
54 |
def image_to_image(image_path, prompt=None, model=None, negative_prompt=None,
|
55 |
-
guidance_scale=None, num_inference_steps=None):
|
56 |
"""
|
57 |
Transform image using the API
|
58 |
Only image_path is required, other parameters are optional and will use server defaults
|
@@ -76,6 +76,9 @@ def image_to_image(image_path, prompt=None, model=None, negative_prompt=None,
|
|
76 |
|
77 |
if num_inference_steps is not None:
|
78 |
data["num_inference_steps"] = num_inference_steps
|
|
|
|
|
|
|
79 |
|
80 |
# Prepare the image file
|
81 |
files = {
|
@@ -112,3 +115,9 @@ if __name__ == "__main__":
|
|
112 |
# if result:
|
113 |
# result.save("img2img_output.png")
|
114 |
# print("Image saved as img2img_output.png")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
return None
|
53 |
|
54 |
def image_to_image(image_path, prompt=None, model=None, negative_prompt=None,
|
55 |
+
guidance_scale=None, num_inference_steps=None, use_controlnet=False):
|
56 |
"""
|
57 |
Transform image using the API
|
58 |
Only image_path is required, other parameters are optional and will use server defaults
|
|
|
76 |
|
77 |
if num_inference_steps is not None:
|
78 |
data["num_inference_steps"] = num_inference_steps
|
79 |
+
|
80 |
+
if use_controlnet:
|
81 |
+
data["use_controlnet"] = "True"
|
82 |
|
83 |
# Prepare the image file
|
84 |
files = {
|
|
|
115 |
# if result:
|
116 |
# result.save("img2img_output.png")
|
117 |
# print("Image saved as img2img_output.png")
|
118 |
+
|
119 |
+
# Example with ControlNet depth-based transformation:
|
120 |
+
# result = image_to_image("input.png", prompt="A futuristic cityscape", use_controlnet=True)
|
121 |
+
# if result:
|
122 |
+
# result.save("controlnet_output.png")
|
123 |
+
# print("Image saved as controlnet_output.png")
|
app.py
CHANGED
@@ -1,12 +1,16 @@
|
|
1 |
import gradio as gr
|
2 |
import config
|
3 |
from inference import DiffusionInference
|
|
|
4 |
from PIL import Image
|
5 |
import io
|
6 |
|
7 |
# Initialize the inference class
|
8 |
inference = DiffusionInference()
|
9 |
|
|
|
|
|
|
|
10 |
def text_to_image_fn(prompt, model, negative_prompt=None, guidance_scale=7.5, num_inference_steps=50):
|
11 |
"""
|
12 |
Handle text to image generation request
|
@@ -34,36 +38,47 @@ def text_to_image_fn(prompt, model, negative_prompt=None, guidance_scale=7.5, nu
|
|
34 |
print(error_msg)
|
35 |
return None, error_msg
|
36 |
|
37 |
-
def image_to_image_fn(image, prompt, model, negative_prompt=None, guidance_scale=7.5, num_inference_steps=50):
|
38 |
"""
|
39 |
Handle image to image transformation request
|
40 |
"""
|
41 |
if image is None:
|
42 |
return None, "No input image provided."
|
43 |
|
44 |
-
# Model validation - fallback to default if empty
|
45 |
-
if not model or model.strip() == '':
|
46 |
-
model = config.DEFAULT_IMG2IMG_MODEL
|
47 |
-
|
48 |
# Handle empty prompt - use default if completely empty
|
49 |
if prompt is None or prompt.strip() == "":
|
50 |
prompt = config.DEFAULT_IMG2IMG_PROMPT
|
51 |
|
52 |
try:
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
except Exception as e:
|
68 |
error_msg = f"Error: {str(e)}"
|
69 |
print(error_msg)
|
@@ -102,7 +117,13 @@ with gr.Blocks(title="Diffusion Models") as app:
|
|
102 |
img2img_input = gr.Image(type="pil", label="Input Image")
|
103 |
img2img_prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here...", value=config.DEFAULT_IMG2IMG_PROMPT)
|
104 |
img2img_negative = gr.Textbox(label="Negative Prompt (Optional)", placeholder="What to exclude from the image", value=config.DEFAULT_NEGATIVE_PROMPT)
|
105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
img2img_guidance = gr.Slider(minimum=1.0, maximum=20.0, value=7.5, step=0.5, label="Guidance Scale")
|
107 |
img2img_steps = gr.Slider(minimum=10, maximum=100, value=50, step=1, label="Inference Steps")
|
108 |
img2img_button = gr.Button("Transform Image")
|
@@ -113,9 +134,19 @@ with gr.Blocks(title="Diffusion Models") as app:
|
|
113 |
|
114 |
img2img_button.click(
|
115 |
fn=image_to_image_fn,
|
116 |
-
inputs=[img2img_input, img2img_prompt, img2img_model, img2img_negative, img2img_guidance, img2img_steps],
|
117 |
outputs=[img2img_output, img2img_error]
|
118 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
|
120 |
# Launch the Gradio app
|
121 |
if __name__ == "__main__":
|
|
|
1 |
import gradio as gr
|
2 |
import config
|
3 |
from inference import DiffusionInference
|
4 |
+
from controlnet_pipeline import ControlNetPipeline
|
5 |
from PIL import Image
|
6 |
import io
|
7 |
|
8 |
# Initialize the inference class
|
9 |
inference = DiffusionInference()
|
10 |
|
11 |
+
# Initialize the ControlNet pipeline
|
12 |
+
controlnet = ControlNetPipeline()
|
13 |
+
|
14 |
def text_to_image_fn(prompt, model, negative_prompt=None, guidance_scale=7.5, num_inference_steps=50):
|
15 |
"""
|
16 |
Handle text to image generation request
|
|
|
38 |
print(error_msg)
|
39 |
return None, error_msg
|
40 |
|
41 |
+
def image_to_image_fn(image, prompt, model, use_controlnet=False, negative_prompt=None, guidance_scale=7.5, num_inference_steps=50):
|
42 |
"""
|
43 |
Handle image to image transformation request
|
44 |
"""
|
45 |
if image is None:
|
46 |
return None, "No input image provided."
|
47 |
|
|
|
|
|
|
|
|
|
48 |
# Handle empty prompt - use default if completely empty
|
49 |
if prompt is None or prompt.strip() == "":
|
50 |
prompt = config.DEFAULT_IMG2IMG_PROMPT
|
51 |
|
52 |
try:
|
53 |
+
if use_controlnet:
|
54 |
+
# Use ControlNet pipeline directly on the device
|
55 |
+
result = controlnet.generate(
|
56 |
+
prompt=prompt,
|
57 |
+
image=image,
|
58 |
+
negative_prompt=negative_prompt,
|
59 |
+
guidance_scale=float(guidance_scale),
|
60 |
+
num_inference_steps=int(num_inference_steps)
|
61 |
+
)
|
62 |
+
return result, None
|
63 |
+
else:
|
64 |
+
# Model validation - fallback to default if empty
|
65 |
+
if not model or model.strip() == '':
|
66 |
+
model = config.DEFAULT_IMG2IMG_MODEL
|
67 |
+
|
68 |
+
# Use regular inference API
|
69 |
+
result = inference.image_to_image(
|
70 |
+
image=image,
|
71 |
+
prompt=prompt,
|
72 |
+
model_name=model,
|
73 |
+
negative_prompt=negative_prompt,
|
74 |
+
guidance_scale=float(guidance_scale) if guidance_scale is not None else None,
|
75 |
+
num_inference_steps=int(num_inference_steps) if num_inference_steps is not None else None
|
76 |
+
)
|
77 |
+
|
78 |
+
if result is None:
|
79 |
+
return None, "No image was generated. Check the model and parameters."
|
80 |
+
|
81 |
+
return result, None
|
82 |
except Exception as e:
|
83 |
error_msg = f"Error: {str(e)}"
|
84 |
print(error_msg)
|
|
|
117 |
img2img_input = gr.Image(type="pil", label="Input Image")
|
118 |
img2img_prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here...", value=config.DEFAULT_IMG2IMG_PROMPT)
|
119 |
img2img_negative = gr.Textbox(label="Negative Prompt (Optional)", placeholder="What to exclude from the image", value=config.DEFAULT_NEGATIVE_PROMPT)
|
120 |
+
|
121 |
+
with gr.Row():
|
122 |
+
with gr.Column(scale=1):
|
123 |
+
img2img_controlnet = gr.Checkbox(label="Use ControlNet (Depth)", value=False)
|
124 |
+
with gr.Column(scale=2):
|
125 |
+
img2img_model = gr.Textbox(label="Model (used only if ControlNet is disabled)", placeholder=f"Enter model name", value=config.DEFAULT_IMG2IMG_MODEL, visible=True)
|
126 |
+
|
127 |
img2img_guidance = gr.Slider(minimum=1.0, maximum=20.0, value=7.5, step=0.5, label="Guidance Scale")
|
128 |
img2img_steps = gr.Slider(minimum=10, maximum=100, value=50, step=1, label="Inference Steps")
|
129 |
img2img_button = gr.Button("Transform Image")
|
|
|
134 |
|
135 |
img2img_button.click(
|
136 |
fn=image_to_image_fn,
|
137 |
+
inputs=[img2img_input, img2img_prompt, img2img_model, img2img_controlnet, img2img_negative, img2img_guidance, img2img_steps],
|
138 |
outputs=[img2img_output, img2img_error]
|
139 |
)
|
140 |
+
|
141 |
+
# Add visibility toggle for the model textbox based on ControlNet checkbox
|
142 |
+
def toggle_model_visibility(use_controlnet):
|
143 |
+
return not use_controlnet
|
144 |
+
|
145 |
+
img2img_controlnet.change(
|
146 |
+
fn=toggle_model_visibility,
|
147 |
+
inputs=[img2img_controlnet],
|
148 |
+
outputs=[img2img_model]
|
149 |
+
)
|
150 |
|
151 |
# Launch the Gradio app
|
152 |
if __name__ == "__main__":
|
config.py
CHANGED
@@ -5,6 +5,8 @@ from dotenv import load_dotenv
|
|
5 |
load_dotenv()
|
6 |
|
7 |
# Hugging Face API token
|
|
|
|
|
8 |
HF_TOKEN = os.getenv("HF_TOKEN", "")
|
9 |
|
10 |
# Default model for text to image
|
@@ -13,6 +15,11 @@ DEFAULT_TEXT2IMG_MODEL = "stabilityai/stable-diffusion-3-medium-diffusers"
|
|
13 |
# Default model for image to image
|
14 |
DEFAULT_IMG2IMG_MODEL = "stabilityai/stable-diffusion-xl-refiner-1.0"
|
15 |
|
|
|
|
|
|
|
|
|
|
|
16 |
# Default prompts - used as placeholders in UI and defaults in API
|
17 |
DEFAULT_TEXT2IMG_PROMPT = "A beautiful landscape with mountains and a lake"
|
18 |
DEFAULT_IMG2IMG_PROMPT = "Transform this image with fantasy elements"
|
|
|
5 |
load_dotenv()
|
6 |
|
7 |
# Hugging Face API token
|
8 |
+
# First try to get from environment variables (Hugging Face Spaces secrets)
|
9 |
+
# Then fall back to .env file for local development
|
10 |
HF_TOKEN = os.getenv("HF_TOKEN", "")
|
11 |
|
12 |
# Default model for text to image
|
|
|
15 |
# Default model for image to image
|
16 |
DEFAULT_IMG2IMG_MODEL = "stabilityai/stable-diffusion-xl-refiner-1.0"
|
17 |
|
18 |
+
# ControlNet configuration
|
19 |
+
USE_CONTROLNET = True # Set to False to disable ControlNet in case of issues
|
20 |
+
CONTROLNET_MODEL = "lllyasviel/sd-controlnet-depth"
|
21 |
+
BASE_MODEL = "stable-diffusion-v1-5/stable-diffusion-v1-5"
|
22 |
+
|
23 |
# Default prompts - used as placeholders in UI and defaults in API
|
24 |
DEFAULT_TEXT2IMG_PROMPT = "A beautiful landscape with mountains and a lake"
|
25 |
DEFAULT_IMG2IMG_PROMPT = "Transform this image with fantasy elements"
|
controlnet_pipeline.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import numpy as np
|
3 |
+
from PIL import Image
|
4 |
+
from transformers import pipeline
|
5 |
+
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
|
6 |
+
from diffusers.utils import load_image
|
7 |
+
import os
|
8 |
+
import huggingface_hub
|
9 |
+
import spaces
|
10 |
+
import config
|
11 |
+
|
12 |
+
class ControlNetPipeline:
|
13 |
+
def __init__(self):
|
14 |
+
"""Initialize the ControlNet pipeline with lazy loading"""
|
15 |
+
self.depth_estimator = None
|
16 |
+
self.pipe = None
|
17 |
+
self.controlnet = None
|
18 |
+
self.is_initialized = False
|
19 |
+
|
20 |
+
@spaces.GPU
|
21 |
+
def initialize(self):
|
22 |
+
"""Initialize the models with GPU acceleration"""
|
23 |
+
if self.is_initialized:
|
24 |
+
return
|
25 |
+
|
26 |
+
# Load depth estimator
|
27 |
+
self.depth_estimator = pipeline('depth-estimation')
|
28 |
+
|
29 |
+
# Load ControlNet model
|
30 |
+
self.controlnet = ControlNetModel.from_pretrained(
|
31 |
+
config.CONTROLNET_MODEL,
|
32 |
+
torch_dtype=torch.float16
|
33 |
+
)
|
34 |
+
|
35 |
+
# Load Stable Diffusion pipeline with ControlNet
|
36 |
+
self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
37 |
+
config.BASE_MODEL,
|
38 |
+
controlnet=self.controlnet,
|
39 |
+
safety_checker=None,
|
40 |
+
torch_dtype=torch.float16
|
41 |
+
)
|
42 |
+
|
43 |
+
# Use more efficient scheduler
|
44 |
+
self.pipe.scheduler = UniPCMultistepScheduler.from_config(self.pipe.scheduler.config)
|
45 |
+
|
46 |
+
# Enable memory optimizations
|
47 |
+
try:
|
48 |
+
self.pipe.enable_xformers_memory_efficient_attention()
|
49 |
+
except:
|
50 |
+
print("xformers not available, using default attention mechanism")
|
51 |
+
|
52 |
+
self.pipe.enable_model_cpu_offload()
|
53 |
+
self.is_initialized = True
|
54 |
+
|
55 |
+
@spaces.GPU
|
56 |
+
def process_image(self, image):
|
57 |
+
"""Process the input image to generate depth map"""
|
58 |
+
# Ensure model is initialized
|
59 |
+
if not self.is_initialized:
|
60 |
+
self.initialize()
|
61 |
+
|
62 |
+
# Generate depth map
|
63 |
+
depth = self.depth_estimator(image)['depth']
|
64 |
+
depth_array = np.array(depth)
|
65 |
+
depth_array = depth_array[:, :, None]
|
66 |
+
depth_array = np.concatenate([depth_array, depth_array, depth_array], axis=2)
|
67 |
+
depth_image = Image.fromarray(depth_array)
|
68 |
+
|
69 |
+
return depth_image
|
70 |
+
|
71 |
+
@spaces.GPU
|
72 |
+
def generate(self, prompt, image, negative_prompt=None, guidance_scale=7.5, num_inference_steps=20):
|
73 |
+
"""Generate an image using ControlNet with the provided prompt and input image"""
|
74 |
+
# Ensure model is initialized
|
75 |
+
if not self.is_initialized:
|
76 |
+
self.initialize()
|
77 |
+
|
78 |
+
# Process image to get depth map
|
79 |
+
depth_image = self.process_image(image)
|
80 |
+
|
81 |
+
# Generate the image
|
82 |
+
output = self.pipe(
|
83 |
+
prompt=prompt,
|
84 |
+
image=depth_image,
|
85 |
+
negative_prompt=negative_prompt,
|
86 |
+
guidance_scale=float(guidance_scale),
|
87 |
+
num_inference_steps=int(num_inference_steps)
|
88 |
+
)
|
89 |
+
|
90 |
+
return output.images[0]
|
main.py
CHANGED
@@ -36,8 +36,13 @@ def main():
|
|
36 |
|
37 |
# Check if HF_TOKEN is set
|
38 |
if not config.HF_TOKEN:
|
39 |
-
print("
|
40 |
-
print("
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
if args.mode == "all":
|
43 |
# Run both API and UI in separate threads
|
|
|
36 |
|
37 |
# Check if HF_TOKEN is set
|
38 |
if not config.HF_TOKEN:
|
39 |
+
print("\n")
|
40 |
+
print("*" * 80)
|
41 |
+
print("WARNING: HF_TOKEN environment variable is not set!")
|
42 |
+
print("* For local development: Create a .env file with HF_TOKEN=your_token")
|
43 |
+
print("* For Hugging Face Spaces: Add HF_TOKEN as a secret in your Space settings")
|
44 |
+
print("*" * 80)
|
45 |
+
print("\n")
|
46 |
|
47 |
if args.mode == "all":
|
48 |
# Run both API and UI in separate threads
|
requirements.txt
CHANGED
@@ -4,3 +4,9 @@ Pillow
|
|
4 |
fastapi
|
5 |
uvicorn
|
6 |
python-dotenv
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
fastapi
|
5 |
uvicorn
|
6 |
python-dotenv
|
7 |
+
torch
|
8 |
+
transformers
|
9 |
+
diffusers
|
10 |
+
spaces
|
11 |
+
xformers
|
12 |
+
numpy
|
spaces_config.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"sdk": "gradio",
|
3 |
"sdk_version": "3.50.2",
|
4 |
-
"app_file": "
|
5 |
"models": [
|
6 |
{
|
7 |
"model_name": "stabilityai/stable-diffusion-2-1",
|
@@ -10,6 +10,14 @@
|
|
10 |
{
|
11 |
"model_name": "lllyasviel/sd-controlnet-depth",
|
12 |
"model_class": "diffusers"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
}
|
14 |
],
|
15 |
"resources": {
|
|
|
1 |
{
|
2 |
"sdk": "gradio",
|
3 |
"sdk_version": "3.50.2",
|
4 |
+
"app_file": "main.py",
|
5 |
"models": [
|
6 |
{
|
7 |
"model_name": "stabilityai/stable-diffusion-2-1",
|
|
|
10 |
{
|
11 |
"model_name": "lllyasviel/sd-controlnet-depth",
|
12 |
"model_class": "diffusers"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"model_name": "stable-diffusion-v1-5/stable-diffusion-v1-5",
|
16 |
+
"model_class": "diffusers"
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"model_name": "stabilityai/stable-diffusion-xl-refiner-1.0",
|
20 |
+
"model_class": "diffusers"
|
21 |
}
|
22 |
],
|
23 |
"resources": {
|