ovedrive commited on
Commit
0443b19
·
1 Parent(s): 479f11d

merge controlnet

Browse files
Files changed (11) hide show
  1. .env.example +1 -1
  2. .gitignore +2 -1
  3. Project.md +35 -2
  4. api.py +37 -21
  5. api_example.py +10 -1
  6. app.py +52 -21
  7. config.py +7 -0
  8. controlnet_pipeline.py +90 -0
  9. main.py +7 -2
  10. requirements.txt +6 -0
  11. spaces_config.json +9 -1
.env.example CHANGED
@@ -1,5 +1,5 @@
1
  # Hugging Face token
2
- HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxx
3
 
4
  # API settings
5
  API_HOST=0.0.0.0
 
1
  # Hugging Face token
2
+ HF_TOKEN=your_token_here
3
 
4
  # API settings
5
  API_HOST=0.0.0.0
.gitignore CHANGED
@@ -1,4 +1,5 @@
1
  .venv
2
  *.pyc
3
  __pycache__
4
- .env
 
 
1
  .venv
2
  *.pyc
3
  __pycache__
4
+ .env
5
+ *.env
Project.md CHANGED
@@ -1,11 +1,12 @@
1
  # Diffusion Models App
2
 
3
- A Python application that uses Hugging Face inference endpoints for text-to-image and image-to-image generation with a Gradio UI and API endpoints.
4
 
5
  ## Features
6
 
7
  - Text-to-image generation
8
  - Image-to-image transformation with optional prompt
 
9
  - Gradio UI for interactive use
10
  - API endpoints for integration with other applications
11
  - Configurable models via text input
@@ -17,16 +18,26 @@ A Python application that uses Hugging Face inference endpoints for text-to-imag
17
  - `app.py` - Gradio UI implementation
18
  - `api.py` - FastAPI server for API endpoints
19
  - `inference.py` - Core functionality for HF inference
 
20
  - `config.py` - Configuration and settings
21
  - `requirements.txt` - Dependencies
22
 
23
  ## Setup & Usage
24
 
 
25
  1. Clone the repository
26
- 2. Create a .env file with your Hugging Face token (copy from .env.example)
27
  3. Install dependencies: `pip install -r requirements.txt`
28
  4. Run the application: `python main.py`
29
 
 
 
 
 
 
 
 
 
30
  ## Running Options
31
 
32
  - Run both UI and API: `python main.py`
@@ -47,6 +58,28 @@ The application includes defaults for:
47
 
48
  These defaults are applied to both the Gradio UI and API endpoints for consistency.
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  ## Environment Variables
51
 
52
  - `HF_TOKEN` - Your Hugging Face API token
 
1
  # Diffusion Models App
2
 
3
+ A Python application that uses Hugging Face inference endpoints and on-device models for text-to-image and image-to-image generation with a Gradio UI and API endpoints.
4
 
5
  ## Features
6
 
7
  - Text-to-image generation
8
  - Image-to-image transformation with optional prompt
9
+ - ControlNet depth-based image transformation
10
  - Gradio UI for interactive use
11
  - API endpoints for integration with other applications
12
  - Configurable models via text input
 
18
  - `app.py` - Gradio UI implementation
19
  - `api.py` - FastAPI server for API endpoints
20
  - `inference.py` - Core functionality for HF inference
21
+ - `controlnet_pipeline.py` - ControlNet depth model pipeline
22
  - `config.py` - Configuration and settings
23
  - `requirements.txt` - Dependencies
24
 
25
  ## Setup & Usage
26
 
27
+ ### Local Development
28
  1. Clone the repository
29
+ 2. Create a `.env` file with your Hugging Face token (copy from `.env.example`)
30
  3. Install dependencies: `pip install -r requirements.txt`
31
  4. Run the application: `python main.py`
32
 
33
+ ### Hugging Face Spaces Deployment
34
+ 1. Never commit the `.env` file with your token to the repository!
35
+ 2. Instead, add your HF_TOKEN as a secret in the Spaces UI:
36
+ - Go to your Space's Settings tab
37
+ - Navigate to Repository Secrets
38
+ - Add a secret named `HF_TOKEN` with your token as the value
39
+ 3. The application will automatically use this secret in the Spaces environment
40
+
41
  ## Running Options
42
 
43
  - Run both UI and API: `python main.py`
 
58
 
59
  These defaults are applied to both the Gradio UI and API endpoints for consistency.
60
 
61
+ ## ControlNet Implementation
62
+
63
+ The application now supports running a ControlNet depth model directly on the Hugging Face Spaces GPU using the `spaces.GPU` decorator. This feature allows for:
64
+
65
+ 1. **On-device processing**: Instead of relying solely on remote inference endpoints, the app can now perform image transformations using the local GPU.
66
+
67
+ 2. **Depth-based transformations**: The ControlNet implementation extracts depth information from the input image, allowing for more structure-preserving transformations.
68
+
69
+ 3. **Integration with existing workflow**: The ControlNet option is seamlessly integrated into the image-to-image tab via a simple checkbox.
70
+
71
+ ### How it works:
72
+
73
+ 1. When a user uploads an image and enables the ControlNet option, the app processes the image through a depth estimator.
74
+ 2. The depth map is then used by the ControlNet model to guide the image generation process.
75
+ 3. The `spaces.GPU` decorator ensures that these operations run on the GPU for optimal performance.
76
+ 4. The resulting image maintains the spatial structure of the original while applying the creative transformation specified in the prompt.
77
+
78
+ The implementation uses:
79
+ - `stable-diffusion-v1-5` as the base model
80
+ - `lllyasviel/sd-controlnet-depth` as the ControlNet model
81
+ - The HuggingFace Transformers depth estimation pipeline
82
+
83
  ## Environment Variables
84
 
85
  - `HF_TOKEN` - Your Hugging Face API token
api.py CHANGED
@@ -5,12 +5,16 @@ import io
5
  import uvicorn
6
  import config
7
  from inference import DiffusionInference
 
8
 
9
  app = FastAPI(title="Diffusion Models API")
10
 
11
  # Initialize the inference class
12
  inference = DiffusionInference()
13
 
 
 
 
14
  @app.get("/")
15
  async def root():
16
  return {"message": "Diffusion Models API is running"}
@@ -58,6 +62,7 @@ async def image_to_image(
58
  image: UploadFile = File(...),
59
  prompt: str = Form(config.DEFAULT_IMG2IMG_PROMPT),
60
  model: str = Form(config.DEFAULT_IMG2IMG_MODEL),
 
61
  negative_prompt: str = Form(config.DEFAULT_NEGATIVE_PROMPT),
62
  guidance_scale: float = Form(7.5),
63
  num_inference_steps: int = Form(50)
@@ -70,27 +75,38 @@ async def image_to_image(
70
  contents = await image.read()
71
  input_image = Image.open(io.BytesIO(contents))
72
 
73
- # Use default model if not specified or empty
74
- if not model or model.strip() == '':
75
- model = config.DEFAULT_IMG2IMG_MODEL
76
-
77
- # Use default prompt if not specified or empty
78
- if not prompt or prompt.strip() == '':
79
- prompt = config.DEFAULT_IMG2IMG_PROMPT
80
-
81
- # Use default negative prompt if not specified or empty
82
- if not negative_prompt or negative_prompt.strip() == '':
83
- negative_prompt = config.DEFAULT_NEGATIVE_PROMPT
84
-
85
- # Call the inference module
86
- result = inference.image_to_image(
87
- image=input_image,
88
- prompt=prompt,
89
- model_name=model,
90
- negative_prompt=negative_prompt,
91
- guidance_scale=guidance_scale,
92
- num_inference_steps=num_inference_steps
93
- )
 
 
 
 
 
 
 
 
 
 
 
94
 
95
  # Convert PIL image to bytes
96
  img_byte_arr = io.BytesIO()
 
5
  import uvicorn
6
  import config
7
  from inference import DiffusionInference
8
+ from controlnet_pipeline import ControlNetPipeline
9
 
10
  app = FastAPI(title="Diffusion Models API")
11
 
12
  # Initialize the inference class
13
  inference = DiffusionInference()
14
 
15
+ # Initialize the ControlNet pipeline
16
+ controlnet = ControlNetPipeline()
17
+
18
  @app.get("/")
19
  async def root():
20
  return {"message": "Diffusion Models API is running"}
 
62
  image: UploadFile = File(...),
63
  prompt: str = Form(config.DEFAULT_IMG2IMG_PROMPT),
64
  model: str = Form(config.DEFAULT_IMG2IMG_MODEL),
65
+ use_controlnet: bool = Form(False),
66
  negative_prompt: str = Form(config.DEFAULT_NEGATIVE_PROMPT),
67
  guidance_scale: float = Form(7.5),
68
  num_inference_steps: int = Form(50)
 
75
  contents = await image.read()
76
  input_image = Image.open(io.BytesIO(contents))
77
 
78
+ # Use ControlNet if specified
79
+ if use_controlnet and config.USE_CONTROLNET:
80
+ # Process with ControlNet pipeline
81
+ result = controlnet.generate(
82
+ prompt=prompt,
83
+ image=input_image,
84
+ negative_prompt=negative_prompt,
85
+ guidance_scale=guidance_scale,
86
+ num_inference_steps=num_inference_steps
87
+ )
88
+ else:
89
+ # Use default model if not specified or empty
90
+ if not model or model.strip() == '':
91
+ model = config.DEFAULT_IMG2IMG_MODEL
92
+
93
+ # Use default prompt if not specified or empty
94
+ if not prompt or prompt.strip() == '':
95
+ prompt = config.DEFAULT_IMG2IMG_PROMPT
96
+
97
+ # Use default negative prompt if not specified or empty
98
+ if not negative_prompt or negative_prompt.strip() == '':
99
+ negative_prompt = config.DEFAULT_NEGATIVE_PROMPT
100
+
101
+ # Call the inference module
102
+ result = inference.image_to_image(
103
+ image=input_image,
104
+ prompt=prompt,
105
+ model_name=model,
106
+ negative_prompt=negative_prompt,
107
+ guidance_scale=guidance_scale,
108
+ num_inference_steps=num_inference_steps
109
+ )
110
 
111
  # Convert PIL image to bytes
112
  img_byte_arr = io.BytesIO()
api_example.py CHANGED
@@ -52,7 +52,7 @@ def text_to_image(prompt=None, model=None, negative_prompt=None, guidance_scale=
52
  return None
53
 
54
  def image_to_image(image_path, prompt=None, model=None, negative_prompt=None,
55
- guidance_scale=None, num_inference_steps=None):
56
  """
57
  Transform image using the API
58
  Only image_path is required, other parameters are optional and will use server defaults
@@ -76,6 +76,9 @@ def image_to_image(image_path, prompt=None, model=None, negative_prompt=None,
76
 
77
  if num_inference_steps is not None:
78
  data["num_inference_steps"] = num_inference_steps
 
 
 
79
 
80
  # Prepare the image file
81
  files = {
@@ -112,3 +115,9 @@ if __name__ == "__main__":
112
  # if result:
113
  # result.save("img2img_output.png")
114
  # print("Image saved as img2img_output.png")
 
 
 
 
 
 
 
52
  return None
53
 
54
  def image_to_image(image_path, prompt=None, model=None, negative_prompt=None,
55
+ guidance_scale=None, num_inference_steps=None, use_controlnet=False):
56
  """
57
  Transform image using the API
58
  Only image_path is required, other parameters are optional and will use server defaults
 
76
 
77
  if num_inference_steps is not None:
78
  data["num_inference_steps"] = num_inference_steps
79
+
80
+ if use_controlnet:
81
+ data["use_controlnet"] = "True"
82
 
83
  # Prepare the image file
84
  files = {
 
115
  # if result:
116
  # result.save("img2img_output.png")
117
  # print("Image saved as img2img_output.png")
118
+
119
+ # Example with ControlNet depth-based transformation:
120
+ # result = image_to_image("input.png", prompt="A futuristic cityscape", use_controlnet=True)
121
+ # if result:
122
+ # result.save("controlnet_output.png")
123
+ # print("Image saved as controlnet_output.png")
app.py CHANGED
@@ -1,12 +1,16 @@
1
  import gradio as gr
2
  import config
3
  from inference import DiffusionInference
 
4
  from PIL import Image
5
  import io
6
 
7
  # Initialize the inference class
8
  inference = DiffusionInference()
9
 
 
 
 
10
  def text_to_image_fn(prompt, model, negative_prompt=None, guidance_scale=7.5, num_inference_steps=50):
11
  """
12
  Handle text to image generation request
@@ -34,36 +38,47 @@ def text_to_image_fn(prompt, model, negative_prompt=None, guidance_scale=7.5, nu
34
  print(error_msg)
35
  return None, error_msg
36
 
37
- def image_to_image_fn(image, prompt, model, negative_prompt=None, guidance_scale=7.5, num_inference_steps=50):
38
  """
39
  Handle image to image transformation request
40
  """
41
  if image is None:
42
  return None, "No input image provided."
43
 
44
- # Model validation - fallback to default if empty
45
- if not model or model.strip() == '':
46
- model = config.DEFAULT_IMG2IMG_MODEL
47
-
48
  # Handle empty prompt - use default if completely empty
49
  if prompt is None or prompt.strip() == "":
50
  prompt = config.DEFAULT_IMG2IMG_PROMPT
51
 
52
  try:
53
- # Call the inference module with explicit parameters
54
- result = inference.image_to_image(
55
- image=image,
56
- prompt=prompt, # This can be None
57
- model_name=model,
58
- negative_prompt=negative_prompt,
59
- guidance_scale=float(guidance_scale) if guidance_scale is not None else None,
60
- num_inference_steps=int(num_inference_steps) if num_inference_steps is not None else None
61
- )
62
-
63
- if result is None:
64
- return None, "No image was generated. Check the model and parameters."
65
-
66
- return result, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  except Exception as e:
68
  error_msg = f"Error: {str(e)}"
69
  print(error_msg)
@@ -102,7 +117,13 @@ with gr.Blocks(title="Diffusion Models") as app:
102
  img2img_input = gr.Image(type="pil", label="Input Image")
103
  img2img_prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here...", value=config.DEFAULT_IMG2IMG_PROMPT)
104
  img2img_negative = gr.Textbox(label="Negative Prompt (Optional)", placeholder="What to exclude from the image", value=config.DEFAULT_NEGATIVE_PROMPT)
105
- img2img_model = gr.Textbox(label="Model", placeholder=f"Enter model name", value=config.DEFAULT_IMG2IMG_MODEL)
 
 
 
 
 
 
106
  img2img_guidance = gr.Slider(minimum=1.0, maximum=20.0, value=7.5, step=0.5, label="Guidance Scale")
107
  img2img_steps = gr.Slider(minimum=10, maximum=100, value=50, step=1, label="Inference Steps")
108
  img2img_button = gr.Button("Transform Image")
@@ -113,9 +134,19 @@ with gr.Blocks(title="Diffusion Models") as app:
113
 
114
  img2img_button.click(
115
  fn=image_to_image_fn,
116
- inputs=[img2img_input, img2img_prompt, img2img_model, img2img_negative, img2img_guidance, img2img_steps],
117
  outputs=[img2img_output, img2img_error]
118
  )
 
 
 
 
 
 
 
 
 
 
119
 
120
  # Launch the Gradio app
121
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  import config
3
  from inference import DiffusionInference
4
+ from controlnet_pipeline import ControlNetPipeline
5
  from PIL import Image
6
  import io
7
 
8
  # Initialize the inference class
9
  inference = DiffusionInference()
10
 
11
+ # Initialize the ControlNet pipeline
12
+ controlnet = ControlNetPipeline()
13
+
14
  def text_to_image_fn(prompt, model, negative_prompt=None, guidance_scale=7.5, num_inference_steps=50):
15
  """
16
  Handle text to image generation request
 
38
  print(error_msg)
39
  return None, error_msg
40
 
41
+ def image_to_image_fn(image, prompt, model, use_controlnet=False, negative_prompt=None, guidance_scale=7.5, num_inference_steps=50):
42
  """
43
  Handle image to image transformation request
44
  """
45
  if image is None:
46
  return None, "No input image provided."
47
 
 
 
 
 
48
  # Handle empty prompt - use default if completely empty
49
  if prompt is None or prompt.strip() == "":
50
  prompt = config.DEFAULT_IMG2IMG_PROMPT
51
 
52
  try:
53
+ if use_controlnet:
54
+ # Use ControlNet pipeline directly on the device
55
+ result = controlnet.generate(
56
+ prompt=prompt,
57
+ image=image,
58
+ negative_prompt=negative_prompt,
59
+ guidance_scale=float(guidance_scale),
60
+ num_inference_steps=int(num_inference_steps)
61
+ )
62
+ return result, None
63
+ else:
64
+ # Model validation - fallback to default if empty
65
+ if not model or model.strip() == '':
66
+ model = config.DEFAULT_IMG2IMG_MODEL
67
+
68
+ # Use regular inference API
69
+ result = inference.image_to_image(
70
+ image=image,
71
+ prompt=prompt,
72
+ model_name=model,
73
+ negative_prompt=negative_prompt,
74
+ guidance_scale=float(guidance_scale) if guidance_scale is not None else None,
75
+ num_inference_steps=int(num_inference_steps) if num_inference_steps is not None else None
76
+ )
77
+
78
+ if result is None:
79
+ return None, "No image was generated. Check the model and parameters."
80
+
81
+ return result, None
82
  except Exception as e:
83
  error_msg = f"Error: {str(e)}"
84
  print(error_msg)
 
117
  img2img_input = gr.Image(type="pil", label="Input Image")
118
  img2img_prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here...", value=config.DEFAULT_IMG2IMG_PROMPT)
119
  img2img_negative = gr.Textbox(label="Negative Prompt (Optional)", placeholder="What to exclude from the image", value=config.DEFAULT_NEGATIVE_PROMPT)
120
+
121
+ with gr.Row():
122
+ with gr.Column(scale=1):
123
+ img2img_controlnet = gr.Checkbox(label="Use ControlNet (Depth)", value=False)
124
+ with gr.Column(scale=2):
125
+ img2img_model = gr.Textbox(label="Model (used only if ControlNet is disabled)", placeholder=f"Enter model name", value=config.DEFAULT_IMG2IMG_MODEL, visible=True)
126
+
127
  img2img_guidance = gr.Slider(minimum=1.0, maximum=20.0, value=7.5, step=0.5, label="Guidance Scale")
128
  img2img_steps = gr.Slider(minimum=10, maximum=100, value=50, step=1, label="Inference Steps")
129
  img2img_button = gr.Button("Transform Image")
 
134
 
135
  img2img_button.click(
136
  fn=image_to_image_fn,
137
+ inputs=[img2img_input, img2img_prompt, img2img_model, img2img_controlnet, img2img_negative, img2img_guidance, img2img_steps],
138
  outputs=[img2img_output, img2img_error]
139
  )
140
+
141
+ # Add visibility toggle for the model textbox based on ControlNet checkbox
142
+ def toggle_model_visibility(use_controlnet):
143
+ return not use_controlnet
144
+
145
+ img2img_controlnet.change(
146
+ fn=toggle_model_visibility,
147
+ inputs=[img2img_controlnet],
148
+ outputs=[img2img_model]
149
+ )
150
 
151
  # Launch the Gradio app
152
  if __name__ == "__main__":
config.py CHANGED
@@ -5,6 +5,8 @@ from dotenv import load_dotenv
5
  load_dotenv()
6
 
7
  # Hugging Face API token
 
 
8
  HF_TOKEN = os.getenv("HF_TOKEN", "")
9
 
10
  # Default model for text to image
@@ -13,6 +15,11 @@ DEFAULT_TEXT2IMG_MODEL = "stabilityai/stable-diffusion-3-medium-diffusers"
13
  # Default model for image to image
14
  DEFAULT_IMG2IMG_MODEL = "stabilityai/stable-diffusion-xl-refiner-1.0"
15
 
 
 
 
 
 
16
  # Default prompts - used as placeholders in UI and defaults in API
17
  DEFAULT_TEXT2IMG_PROMPT = "A beautiful landscape with mountains and a lake"
18
  DEFAULT_IMG2IMG_PROMPT = "Transform this image with fantasy elements"
 
5
  load_dotenv()
6
 
7
  # Hugging Face API token
8
+ # First try to get from environment variables (Hugging Face Spaces secrets)
9
+ # Then fall back to .env file for local development
10
  HF_TOKEN = os.getenv("HF_TOKEN", "")
11
 
12
  # Default model for text to image
 
15
  # Default model for image to image
16
  DEFAULT_IMG2IMG_MODEL = "stabilityai/stable-diffusion-xl-refiner-1.0"
17
 
18
+ # ControlNet configuration
19
+ USE_CONTROLNET = True # Set to False to disable ControlNet in case of issues
20
+ CONTROLNET_MODEL = "lllyasviel/sd-controlnet-depth"
21
+ BASE_MODEL = "stable-diffusion-v1-5/stable-diffusion-v1-5"
22
+
23
  # Default prompts - used as placeholders in UI and defaults in API
24
  DEFAULT_TEXT2IMG_PROMPT = "A beautiful landscape with mountains and a lake"
25
  DEFAULT_IMG2IMG_PROMPT = "Transform this image with fantasy elements"
controlnet_pipeline.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import numpy as np
3
+ from PIL import Image
4
+ from transformers import pipeline
5
+ from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
6
+ from diffusers.utils import load_image
7
+ import os
8
+ import huggingface_hub
9
+ import spaces
10
+ import config
11
+
12
+ class ControlNetPipeline:
13
+ def __init__(self):
14
+ """Initialize the ControlNet pipeline with lazy loading"""
15
+ self.depth_estimator = None
16
+ self.pipe = None
17
+ self.controlnet = None
18
+ self.is_initialized = False
19
+
20
+ @spaces.GPU
21
+ def initialize(self):
22
+ """Initialize the models with GPU acceleration"""
23
+ if self.is_initialized:
24
+ return
25
+
26
+ # Load depth estimator
27
+ self.depth_estimator = pipeline('depth-estimation')
28
+
29
+ # Load ControlNet model
30
+ self.controlnet = ControlNetModel.from_pretrained(
31
+ config.CONTROLNET_MODEL,
32
+ torch_dtype=torch.float16
33
+ )
34
+
35
+ # Load Stable Diffusion pipeline with ControlNet
36
+ self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
37
+ config.BASE_MODEL,
38
+ controlnet=self.controlnet,
39
+ safety_checker=None,
40
+ torch_dtype=torch.float16
41
+ )
42
+
43
+ # Use more efficient scheduler
44
+ self.pipe.scheduler = UniPCMultistepScheduler.from_config(self.pipe.scheduler.config)
45
+
46
+ # Enable memory optimizations
47
+ try:
48
+ self.pipe.enable_xformers_memory_efficient_attention()
49
+ except:
50
+ print("xformers not available, using default attention mechanism")
51
+
52
+ self.pipe.enable_model_cpu_offload()
53
+ self.is_initialized = True
54
+
55
+ @spaces.GPU
56
+ def process_image(self, image):
57
+ """Process the input image to generate depth map"""
58
+ # Ensure model is initialized
59
+ if not self.is_initialized:
60
+ self.initialize()
61
+
62
+ # Generate depth map
63
+ depth = self.depth_estimator(image)['depth']
64
+ depth_array = np.array(depth)
65
+ depth_array = depth_array[:, :, None]
66
+ depth_array = np.concatenate([depth_array, depth_array, depth_array], axis=2)
67
+ depth_image = Image.fromarray(depth_array)
68
+
69
+ return depth_image
70
+
71
+ @spaces.GPU
72
+ def generate(self, prompt, image, negative_prompt=None, guidance_scale=7.5, num_inference_steps=20):
73
+ """Generate an image using ControlNet with the provided prompt and input image"""
74
+ # Ensure model is initialized
75
+ if not self.is_initialized:
76
+ self.initialize()
77
+
78
+ # Process image to get depth map
79
+ depth_image = self.process_image(image)
80
+
81
+ # Generate the image
82
+ output = self.pipe(
83
+ prompt=prompt,
84
+ image=depth_image,
85
+ negative_prompt=negative_prompt,
86
+ guidance_scale=float(guidance_scale),
87
+ num_inference_steps=int(num_inference_steps)
88
+ )
89
+
90
+ return output.images[0]
main.py CHANGED
@@ -36,8 +36,13 @@ def main():
36
 
37
  # Check if HF_TOKEN is set
38
  if not config.HF_TOKEN:
39
- print("Warning: HF_TOKEN environment variable is not set. Please set it for API access.")
40
- print("You can create a .env file with HF_TOKEN=your_token or set it in your environment.")
 
 
 
 
 
41
 
42
  if args.mode == "all":
43
  # Run both API and UI in separate threads
 
36
 
37
  # Check if HF_TOKEN is set
38
  if not config.HF_TOKEN:
39
+ print("\n")
40
+ print("*" * 80)
41
+ print("WARNING: HF_TOKEN environment variable is not set!")
42
+ print("* For local development: Create a .env file with HF_TOKEN=your_token")
43
+ print("* For Hugging Face Spaces: Add HF_TOKEN as a secret in your Space settings")
44
+ print("*" * 80)
45
+ print("\n")
46
 
47
  if args.mode == "all":
48
  # Run both API and UI in separate threads
requirements.txt CHANGED
@@ -4,3 +4,9 @@ Pillow
4
  fastapi
5
  uvicorn
6
  python-dotenv
 
 
 
 
 
 
 
4
  fastapi
5
  uvicorn
6
  python-dotenv
7
+ torch
8
+ transformers
9
+ diffusers
10
+ spaces
11
+ xformers
12
+ numpy
spaces_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "sdk": "gradio",
3
  "sdk_version": "3.50.2",
4
- "app_file": "app.py",
5
  "models": [
6
  {
7
  "model_name": "stabilityai/stable-diffusion-2-1",
@@ -10,6 +10,14 @@
10
  {
11
  "model_name": "lllyasviel/sd-controlnet-depth",
12
  "model_class": "diffusers"
 
 
 
 
 
 
 
 
13
  }
14
  ],
15
  "resources": {
 
1
  {
2
  "sdk": "gradio",
3
  "sdk_version": "3.50.2",
4
+ "app_file": "main.py",
5
  "models": [
6
  {
7
  "model_name": "stabilityai/stable-diffusion-2-1",
 
10
  {
11
  "model_name": "lllyasviel/sd-controlnet-depth",
12
  "model_class": "diffusers"
13
+ },
14
+ {
15
+ "model_name": "stable-diffusion-v1-5/stable-diffusion-v1-5",
16
+ "model_class": "diffusers"
17
+ },
18
+ {
19
+ "model_name": "stabilityai/stable-diffusion-xl-refiner-1.0",
20
+ "model_class": "diffusers"
21
  }
22
  ],
23
  "resources": {