imagencpu

Sleeping

App Files Files Community

ovedrive commited on Mar 20

Commit

0443b19

1 Parent(s): 479f11d

merge controlnet

Browse files

Files changed (11) hide show

.env.example +1 -1
.gitignore +2 -1
Project.md +35 -2
api.py +37 -21
api_example.py +10 -1
app.py +52 -21
config.py +7 -0
controlnet_pipeline.py +90 -0
main.py +7 -2
requirements.txt +6 -0
spaces_config.json +9 -1

.env.example CHANGED Viewed

@@ -1,5 +1,5 @@
 # Hugging Face token
-HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxx
 # API settings
 API_HOST=0.0.0.0

 # Hugging Face token
+HF_TOKEN=your_token_here
 # API settings
 API_HOST=0.0.0.0

.gitignore CHANGED Viewed

@@ -1,4 +1,5 @@
 .venv
 *.pyc
 __pycache__
-.env

 .venv
 *.pyc
 __pycache__
+.env
+*.env

Project.md CHANGED Viewed

@@ -1,11 +1,12 @@
 # Diffusion Models App
-A Python application that uses Hugging Face inference endpoints for text-to-image and image-to-image generation with a Gradio UI and API endpoints.
 ## Features
 - Text-to-image generation
 - Image-to-image transformation with optional prompt
 - Gradio UI for interactive use
 - API endpoints for integration with other applications
 - Configurable models via text input
@@ -17,16 +18,26 @@ A Python application that uses Hugging Face inference endpoints for text-to-imag
 - `app.py` - Gradio UI implementation
 - `api.py` - FastAPI server for API endpoints
 - `inference.py` - Core functionality for HF inference
 - `config.py` - Configuration and settings
 - `requirements.txt` - Dependencies
 ## Setup & Usage
 1. Clone the repository
-2. Create a .env file with your Hugging Face token (copy from .env.example)
 3. Install dependencies: `pip install -r requirements.txt`
 4. Run the application: `python main.py`
 ## Running Options
 - Run both UI and API: `python main.py`
@@ -47,6 +58,28 @@ The application includes defaults for:
 These defaults are applied to both the Gradio UI and API endpoints for consistency.
 ## Environment Variables
 - `HF_TOKEN` - Your Hugging Face API token

 # Diffusion Models App
+A Python application that uses Hugging Face inference endpoints and on-device models for text-to-image and image-to-image generation with a Gradio UI and API endpoints.
 ## Features
 - Text-to-image generation
 - Image-to-image transformation with optional prompt
+- ControlNet depth-based image transformation
 - Gradio UI for interactive use
 - API endpoints for integration with other applications
 - Configurable models via text input
 - `app.py` - Gradio UI implementation
 - `api.py` - FastAPI server for API endpoints
 - `inference.py` - Core functionality for HF inference
+- `controlnet_pipeline.py` - ControlNet depth model pipeline
 - `config.py` - Configuration and settings
 - `requirements.txt` - Dependencies
 ## Setup & Usage
+### Local Development
 1. Clone the repository
+2. Create a `.env` file with your Hugging Face token (copy from `.env.example`)
 3. Install dependencies: `pip install -r requirements.txt`
 4. Run the application: `python main.py`
+### Hugging Face Spaces Deployment
+1. Never commit the `.env` file with your token to the repository!
+2. Instead, add your HF_TOKEN as a secret in the Spaces UI:
+   - Go to your Space's Settings tab
+   - Navigate to Repository Secrets
+   - Add a secret named `HF_TOKEN` with your token as the value
+3. The application will automatically use this secret in the Spaces environment
 ## Running Options
 - Run both UI and API: `python main.py`
 These defaults are applied to both the Gradio UI and API endpoints for consistency.
+## ControlNet Implementation
+The application now supports running a ControlNet depth model directly on the Hugging Face Spaces GPU using the `spaces.GPU` decorator. This feature allows for:
+1. **On-device processing**: Instead of relying solely on remote inference endpoints, the app can now perform image transformations using the local GPU.
+2. **Depth-based transformations**: The ControlNet implementation extracts depth information from the input image, allowing for more structure-preserving transformations.
+3. **Integration with existing workflow**: The ControlNet option is seamlessly integrated into the image-to-image tab via a simple checkbox.
+### How it works:
+1. When a user uploads an image and enables the ControlNet option, the app processes the image through a depth estimator.
+2. The depth map is then used by the ControlNet model to guide the image generation process.
+3. The `spaces.GPU` decorator ensures that these operations run on the GPU for optimal performance.
+4. The resulting image maintains the spatial structure of the original while applying the creative transformation specified in the prompt.
+The implementation uses:
+- `stable-diffusion-v1-5` as the base model
+- `lllyasviel/sd-controlnet-depth` as the ControlNet model
+- The HuggingFace Transformers depth estimation pipeline
 ## Environment Variables
 - `HF_TOKEN` - Your Hugging Face API token

api.py CHANGED Viewed

@@ -5,12 +5,16 @@ import io
 import uvicorn
 import config
 from inference import DiffusionInference
 app = FastAPI(title="Diffusion Models API")
 # Initialize the inference class
 inference = DiffusionInference()
 @app.get("/")
 async def root():
     return {"message": "Diffusion Models API is running"}
@@ -58,6 +62,7 @@ async def image_to_image(
     image: UploadFile = File(...),
     prompt: str = Form(config.DEFAULT_IMG2IMG_PROMPT),
     model: str = Form(config.DEFAULT_IMG2IMG_MODEL),
     negative_prompt: str = Form(config.DEFAULT_NEGATIVE_PROMPT),
     guidance_scale: float = Form(7.5),
     num_inference_steps: int = Form(50)
@@ -70,27 +75,38 @@ async def image_to_image(
         contents = await image.read()
         input_image = Image.open(io.BytesIO(contents))
-        # Use default model if not specified or empty
-        if not model or model.strip() == '':
-            model = config.DEFAULT_IMG2IMG_MODEL
-        # Use default prompt if not specified or empty
-        if not prompt or prompt.strip() == '':
-            prompt = config.DEFAULT_IMG2IMG_PROMPT
-        # Use default negative prompt if not specified or empty
-        if not negative_prompt or negative_prompt.strip() == '':
-            negative_prompt = config.DEFAULT_NEGATIVE_PROMPT
-        # Call the inference module
-        result = inference.image_to_image(
-            image=input_image,
-            prompt=prompt,
-            model_name=model,
-            negative_prompt=negative_prompt,
-            guidance_scale=guidance_scale,
-            num_inference_steps=num_inference_steps
-        )
         # Convert PIL image to bytes
         img_byte_arr = io.BytesIO()

 import uvicorn
 import config
 from inference import DiffusionInference
+from controlnet_pipeline import ControlNetPipeline
 app = FastAPI(title="Diffusion Models API")
 # Initialize the inference class
 inference = DiffusionInference()
+# Initialize the ControlNet pipeline
+controlnet = ControlNetPipeline()
 @app.get("/")
 async def root():
     return {"message": "Diffusion Models API is running"}
     image: UploadFile = File(...),
     prompt: str = Form(config.DEFAULT_IMG2IMG_PROMPT),
     model: str = Form(config.DEFAULT_IMG2IMG_MODEL),
+    use_controlnet: bool = Form(False),
     negative_prompt: str = Form(config.DEFAULT_NEGATIVE_PROMPT),
     guidance_scale: float = Form(7.5),
     num_inference_steps: int = Form(50)
         contents = await image.read()
         input_image = Image.open(io.BytesIO(contents))
+        # Use ControlNet if specified
+        if use_controlnet and config.USE_CONTROLNET:
+            # Process with ControlNet pipeline
+            result = controlnet.generate(
+                prompt=prompt,
+                image=input_image,
+                negative_prompt=negative_prompt,
+                guidance_scale=guidance_scale,
+                num_inference_steps=num_inference_steps
+            )
+        else:
+            # Use default model if not specified or empty
+            if not model or model.strip() == '':
+                model = config.DEFAULT_IMG2IMG_MODEL
+            # Use default prompt if not specified or empty
+            if not prompt or prompt.strip() == '':
+                prompt = config.DEFAULT_IMG2IMG_PROMPT
+            # Use default negative prompt if not specified or empty
+            if not negative_prompt or negative_prompt.strip() == '':
+                negative_prompt = config.DEFAULT_NEGATIVE_PROMPT
+            # Call the inference module
+            result = inference.image_to_image(
+                image=input_image,
+                prompt=prompt,
+                model_name=model,
+                negative_prompt=negative_prompt,
+                guidance_scale=guidance_scale,
+                num_inference_steps=num_inference_steps
+            )
         # Convert PIL image to bytes
         img_byte_arr = io.BytesIO()

api_example.py CHANGED Viewed

@@ -52,7 +52,7 @@ def text_to_image(prompt=None, model=None, negative_prompt=None, guidance_scale=
         return None
 def image_to_image(image_path, prompt=None, model=None, negative_prompt=None,
-                  guidance_scale=None, num_inference_steps=None):
     """
     Transform image using the API
     Only image_path is required, other parameters are optional and will use server defaults
@@ -76,6 +76,9 @@ def image_to_image(image_path, prompt=None, model=None, negative_prompt=None,
     if num_inference_steps is not None:
         data["num_inference_steps"] = num_inference_steps
     # Prepare the image file
     files = {
@@ -112,3 +115,9 @@ if __name__ == "__main__":
     # if result:
     #     result.save("img2img_output.png")
     #     print("Image saved as img2img_output.png")

         return None
 def image_to_image(image_path, prompt=None, model=None, negative_prompt=None,
+                  guidance_scale=None, num_inference_steps=None, use_controlnet=False):
     """
     Transform image using the API
     Only image_path is required, other parameters are optional and will use server defaults
     if num_inference_steps is not None:
         data["num_inference_steps"] = num_inference_steps
+    if use_controlnet:
+        data["use_controlnet"] = "True"
     # Prepare the image file
     files = {
     # if result:
     #     result.save("img2img_output.png")
     #     print("Image saved as img2img_output.png")
+    # Example with ControlNet depth-based transformation:
+    # result = image_to_image("input.png", prompt="A futuristic cityscape", use_controlnet=True)
+    # if result:
+    #     result.save("controlnet_output.png")
+    #     print("Image saved as controlnet_output.png")

app.py CHANGED Viewed

@@ -1,12 +1,16 @@
 import gradio as gr
 import config
 from inference import DiffusionInference
 from PIL import Image
 import io
 # Initialize the inference class
 inference = DiffusionInference()
 def text_to_image_fn(prompt, model, negative_prompt=None, guidance_scale=7.5, num_inference_steps=50):
     """
     Handle text to image generation request
@@ -34,36 +38,47 @@ def text_to_image_fn(prompt, model, negative_prompt=None, guidance_scale=7.5, nu
         print(error_msg)
         return None, error_msg
-def image_to_image_fn(image, prompt, model, negative_prompt=None, guidance_scale=7.5, num_inference_steps=50):
     """
     Handle image to image transformation request
     """
     if image is None:
         return None, "No input image provided."
-    # Model validation - fallback to default if empty
-    if not model or model.strip() == '':
-        model = config.DEFAULT_IMG2IMG_MODEL
     # Handle empty prompt - use default if completely empty
     if prompt is None or prompt.strip() == "":
         prompt = config.DEFAULT_IMG2IMG_PROMPT
     try:
-        # Call the inference module with explicit parameters
-        result = inference.image_to_image(
-            image=image,
-            prompt=prompt,  # This can be None
-            model_name=model,
-            negative_prompt=negative_prompt,
-            guidance_scale=float(guidance_scale) if guidance_scale is not None else None,
-            num_inference_steps=int(num_inference_steps) if num_inference_steps is not None else None
-        )
-        if result is None:
-            return None, "No image was generated. Check the model and parameters."
-        return result, None
     except Exception as e:
         error_msg = f"Error: {str(e)}"
         print(error_msg)
@@ -102,7 +117,13 @@ with gr.Blocks(title="Diffusion Models") as app:
                 img2img_input = gr.Image(type="pil", label="Input Image")
                 img2img_prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here...", value=config.DEFAULT_IMG2IMG_PROMPT)
                 img2img_negative = gr.Textbox(label="Negative Prompt (Optional)", placeholder="What to exclude from the image", value=config.DEFAULT_NEGATIVE_PROMPT)
-                img2img_model = gr.Textbox(label="Model", placeholder=f"Enter model name", value=config.DEFAULT_IMG2IMG_MODEL)
                 img2img_guidance = gr.Slider(minimum=1.0, maximum=20.0, value=7.5, step=0.5, label="Guidance Scale")
                 img2img_steps = gr.Slider(minimum=10, maximum=100, value=50, step=1, label="Inference Steps")
                 img2img_button = gr.Button("Transform Image")
@@ -113,9 +134,19 @@ with gr.Blocks(title="Diffusion Models") as app:
         img2img_button.click(
             fn=image_to_image_fn,
-            inputs=[img2img_input, img2img_prompt, img2img_model, img2img_negative, img2img_guidance, img2img_steps],
             outputs=[img2img_output, img2img_error]
         )
 # Launch the Gradio app
 if __name__ == "__main__":

 import gradio as gr
 import config
 from inference import DiffusionInference
+from controlnet_pipeline import ControlNetPipeline
 from PIL import Image
 import io
 # Initialize the inference class
 inference = DiffusionInference()
+# Initialize the ControlNet pipeline
+controlnet = ControlNetPipeline()
 def text_to_image_fn(prompt, model, negative_prompt=None, guidance_scale=7.5, num_inference_steps=50):
     """
     Handle text to image generation request
         print(error_msg)
         return None, error_msg
+def image_to_image_fn(image, prompt, model, use_controlnet=False, negative_prompt=None, guidance_scale=7.5, num_inference_steps=50):
     """
     Handle image to image transformation request
     """
     if image is None:
         return None, "No input image provided."
     # Handle empty prompt - use default if completely empty
     if prompt is None or prompt.strip() == "":
         prompt = config.DEFAULT_IMG2IMG_PROMPT
     try:
+        if use_controlnet:
+            # Use ControlNet pipeline directly on the device
+            result = controlnet.generate(
+                prompt=prompt,
+                image=image,
+                negative_prompt=negative_prompt,
+                guidance_scale=float(guidance_scale),
+                num_inference_steps=int(num_inference_steps)
+            )
+            return result, None
+        else:
+            # Model validation - fallback to default if empty
+            if not model or model.strip() == '':
+                model = config.DEFAULT_IMG2IMG_MODEL
+            # Use regular inference API
+            result = inference.image_to_image(
+                image=image,
+                prompt=prompt,
+                model_name=model,
+                negative_prompt=negative_prompt,
+                guidance_scale=float(guidance_scale) if guidance_scale is not None else None,
+                num_inference_steps=int(num_inference_steps) if num_inference_steps is not None else None
+            )
+            if result is None:
+                return None, "No image was generated. Check the model and parameters."
+            return result, None
     except Exception as e:
         error_msg = f"Error: {str(e)}"
         print(error_msg)
                 img2img_input = gr.Image(type="pil", label="Input Image")
                 img2img_prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here...", value=config.DEFAULT_IMG2IMG_PROMPT)
                 img2img_negative = gr.Textbox(label="Negative Prompt (Optional)", placeholder="What to exclude from the image", value=config.DEFAULT_NEGATIVE_PROMPT)
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        img2img_controlnet = gr.Checkbox(label="Use ControlNet (Depth)", value=False)
+                    with gr.Column(scale=2):
+                        img2img_model = gr.Textbox(label="Model (used only if ControlNet is disabled)", placeholder=f"Enter model name", value=config.DEFAULT_IMG2IMG_MODEL, visible=True)
                 img2img_guidance = gr.Slider(minimum=1.0, maximum=20.0, value=7.5, step=0.5, label="Guidance Scale")
                 img2img_steps = gr.Slider(minimum=10, maximum=100, value=50, step=1, label="Inference Steps")
                 img2img_button = gr.Button("Transform Image")
         img2img_button.click(
             fn=image_to_image_fn,
+            inputs=[img2img_input, img2img_prompt, img2img_model, img2img_controlnet, img2img_negative, img2img_guidance, img2img_steps],
             outputs=[img2img_output, img2img_error]
         )
+        # Add visibility toggle for the model textbox based on ControlNet checkbox
+        def toggle_model_visibility(use_controlnet):
+            return not use_controlnet
+        img2img_controlnet.change(
+            fn=toggle_model_visibility,
+            inputs=[img2img_controlnet],
+            outputs=[img2img_model]
+        )
 # Launch the Gradio app
 if __name__ == "__main__":

config.py CHANGED Viewed

@@ -5,6 +5,8 @@ from dotenv import load_dotenv
 load_dotenv()
 # Hugging Face API token
 HF_TOKEN = os.getenv("HF_TOKEN", "")
 # Default model for text to image
@@ -13,6 +15,11 @@ DEFAULT_TEXT2IMG_MODEL = "stabilityai/stable-diffusion-3-medium-diffusers"
 # Default model for image to image
 DEFAULT_IMG2IMG_MODEL = "stabilityai/stable-diffusion-xl-refiner-1.0"
 # Default prompts - used as placeholders in UI and defaults in API
 DEFAULT_TEXT2IMG_PROMPT = "A beautiful landscape with mountains and a lake"
 DEFAULT_IMG2IMG_PROMPT = "Transform this image with fantasy elements"

 load_dotenv()
 # Hugging Face API token
+# First try to get from environment variables (Hugging Face Spaces secrets)
+# Then fall back to .env file for local development
 HF_TOKEN = os.getenv("HF_TOKEN", "")
 # Default model for text to image
 # Default model for image to image
 DEFAULT_IMG2IMG_MODEL = "stabilityai/stable-diffusion-xl-refiner-1.0"
+# ControlNet configuration
+USE_CONTROLNET = True  # Set to False to disable ControlNet in case of issues
+CONTROLNET_MODEL = "lllyasviel/sd-controlnet-depth"
+BASE_MODEL = "stable-diffusion-v1-5/stable-diffusion-v1-5"
 # Default prompts - used as placeholders in UI and defaults in API
 DEFAULT_TEXT2IMG_PROMPT = "A beautiful landscape with mountains and a lake"
 DEFAULT_IMG2IMG_PROMPT = "Transform this image with fantasy elements"

controlnet_pipeline.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import torch
+import numpy as np
+from PIL import Image
+from transformers import pipeline
+from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
+from diffusers.utils import load_image
+import os
+import huggingface_hub
+import spaces
+import config
+class ControlNetPipeline:
+    def __init__(self):
+        """Initialize the ControlNet pipeline with lazy loading"""
+        self.depth_estimator = None
+        self.pipe = None
+        self.controlnet = None
+        self.is_initialized = False
+    @spaces.GPU
+    def initialize(self):
+        """Initialize the models with GPU acceleration"""
+        if self.is_initialized:
+            return
+        # Load depth estimator
+        self.depth_estimator = pipeline('depth-estimation')
+        # Load ControlNet model
+        self.controlnet = ControlNetModel.from_pretrained(
+            config.CONTROLNET_MODEL,
+            torch_dtype=torch.float16
+        )
+        # Load Stable Diffusion pipeline with ControlNet
+        self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
+            config.BASE_MODEL,
+            controlnet=self.controlnet,
+            safety_checker=None,
+            torch_dtype=torch.float16
+        )
+        # Use more efficient scheduler
+        self.pipe.scheduler = UniPCMultistepScheduler.from_config(self.pipe.scheduler.config)
+        # Enable memory optimizations
+        try:
+            self.pipe.enable_xformers_memory_efficient_attention()
+        except:
+            print("xformers not available, using default attention mechanism")
+        self.pipe.enable_model_cpu_offload()
+        self.is_initialized = True
+    @spaces.GPU
+    def process_image(self, image):
+        """Process the input image to generate depth map"""
+        # Ensure model is initialized
+        if not self.is_initialized:
+            self.initialize()
+        # Generate depth map
+        depth = self.depth_estimator(image)['depth']
+        depth_array = np.array(depth)
+        depth_array = depth_array[:, :, None]
+        depth_array = np.concatenate([depth_array, depth_array, depth_array], axis=2)
+        depth_image = Image.fromarray(depth_array)
+        return depth_image
+    @spaces.GPU
+    def generate(self, prompt, image, negative_prompt=None, guidance_scale=7.5, num_inference_steps=20):
+        """Generate an image using ControlNet with the provided prompt and input image"""
+        # Ensure model is initialized
+        if not self.is_initialized:
+            self.initialize()
+        # Process image to get depth map
+        depth_image = self.process_image(image)
+        # Generate the image
+        output = self.pipe(
+            prompt=prompt,
+            image=depth_image,
+            negative_prompt=negative_prompt,
+            guidance_scale=float(guidance_scale),
+            num_inference_steps=int(num_inference_steps)
+        )
+        return output.images[0]

main.py CHANGED Viewed

@@ -36,8 +36,13 @@ def main():
     # Check if HF_TOKEN is set
     if not config.HF_TOKEN:
-        print("Warning: HF_TOKEN environment variable is not set. Please set it for API access.")
-        print("You can create a .env file with HF_TOKEN=your_token or set it in your environment.")
     if args.mode == "all":
         # Run both API and UI in separate threads

     # Check if HF_TOKEN is set
     if not config.HF_TOKEN:
+        print("\n")
+        print("*" * 80)
+        print("WARNING: HF_TOKEN environment variable is not set!")
+        print("* For local development: Create a .env file with HF_TOKEN=your_token")
+        print("* For Hugging Face Spaces: Add HF_TOKEN as a secret in your Space settings")
+        print("*" * 80)
+        print("\n")
     if args.mode == "all":
         # Run both API and UI in separate threads

requirements.txt CHANGED Viewed

@@ -4,3 +4,9 @@ Pillow
 fastapi
 uvicorn
 python-dotenv

 fastapi
 uvicorn
 python-dotenv
+torch
+transformers
+diffusers
+spaces
+xformers
+numpy

spaces_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "sdk": "gradio",
   "sdk_version": "3.50.2",
-  "app_file": "app.py",
   "models": [
     {
       "model_name": "stabilityai/stable-diffusion-2-1",
@@ -10,6 +10,14 @@
     {
       "model_name": "lllyasviel/sd-controlnet-depth",
       "model_class": "diffusers"
     }
   ],
   "resources": {

 {
   "sdk": "gradio",
   "sdk_version": "3.50.2",
+  "app_file": "main.py",
   "models": [
     {
       "model_name": "stabilityai/stable-diffusion-2-1",
     {
       "model_name": "lllyasviel/sd-controlnet-depth",
       "model_class": "diffusers"
+    },
+    {
+      "model_name": "stable-diffusion-v1-5/stable-diffusion-v1-5",
+      "model_class": "diffusers"
+    },
+    {
+      "model_name": "stabilityai/stable-diffusion-xl-refiner-1.0",
+      "model_class": "diffusers"
     }
   ],
   "resources": {