Spaces:

KarthikAI
/

Sticker_Diffusion

Running

App Files Files Community

KarthikAI commited on about 19 hours ago

Commit

41ddd36

verified ·

1 Parent(s): 8d3155f

Update utils.py

Browse files

Files changed (1) hide show

utils.py +34 -16

utils.py CHANGED Viewed

@@ -5,15 +5,23 @@ os.makedirs("/data/huggingface/hub", exist_ok=True)
 import torch
 from diffusers import StableDiffusionImg2ImgPipeline
 from PIL import Image
-# --- Place any download or path setup here ---
-MODEL_ID ="runwayml/stable-diffusion-v1-5"  # Can swap for custom path if using IP-Adapter
-ADAPTER_PATH = "/workspace/.cache/huggingface/ip_adapter/ip-adapter_sd15.bin"
-ADAPTER_DIR = "/workspace/.cache/huggingface/ip_adapter"
-DEVICE = "cpu"
-MODEL_CACHE = "/workspace/.cache/huggingface"
 # (Optional) Download IP-Adapter weights and patch pipeline if desired
@@ -21,24 +29,27 @@ MODEL_CACHE = "/workspace/.cache/huggingface"
 pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
     MODEL_ID,
     torch_dtype=torch.float32,
-    cache_dir=MODEL_CACHE,
     # safety_checker=None,  # Disable for demo/testing; enable in prod
 ).to(DEVICE)
 pipe.load_ip_adapter(
-    pretrained_model_name_or_path_or_dict="h94/IP-Adapter",
     subfolder="models",
-    weight_name="ip-adapter_sd15.bin"
 )
-# pipe.load_ip_adapter(
-#     pretrained_model_name_or_path_or_dict=ADAPTER_DIR,
-#     subfolder=".",  # The weights file is directly in ADAPTER_DIR
-#     weight_name="ip-adapter_sd15.bin"
-#     # Optionally: subfolder="models" if using the repo, not a direct path
-# )
-def generate_sticker(input_image, prompt):
     """
     Given a user image and a prompt, generates a sticker/emoji-style portrait.
     """
@@ -51,12 +62,19 @@ def generate_sticker(input_image, prompt):
     # ).to(DEVICE)
     # Preprocess the image (resize, etc)
     init_image = input_image.convert("RGB").resize((512, 512))
     # Run inference (low strength for identity preservation)
     result = pipe(
         prompt=prompt,
         image=init_image,
         strength=0.65,
         guidance_scale=7.5,
         num_inference_steps=30

 import torch
 from diffusers import StableDiffusionImg2ImgPipeline
+from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
 from PIL import Image
+# --- Place any download or path setup here --- old
+# MODEL_ID ="runwayml/stable-diffusion-v1-5"  # Can swap for custom path if using IP-Adapter
+# ADAPTER_PATH = "/workspace/.cache/huggingface/ip_adapter/ip-adapter_sd15.bin"
+# ADAPTER_DIR = "/workspace/.cache/huggingface/ip_adapter"
+# DEVICE = "cpu"
+# MODEL_CACHE = "/workspace/.cache/huggingface"
+# ---- SETTINGS ----
+MODEL_ID = "runwayml/stable-diffusion-v1-5"
+IPADAPTER_REPO = "h94/IP-Adapter"
+IPADAPTER_WEIGHT_NAME = "ip-adapter_sd15.bin"
+DEVICE = "cpu"  # Change to "cuda" if you have GPU
+CACHE_DIR = os.environ.get("HF_HOME", "/data/huggingface")
 # (Optional) Download IP-Adapter weights and patch pipeline if desired
 pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
     MODEL_ID,
     torch_dtype=torch.float32,
+    cache_dir=CACHE_DIR,
     # safety_checker=None,  # Disable for demo/testing; enable in prod
 ).to(DEVICE)
 pipe.load_ip_adapter(
+    pretrained_model_name_or_path_or_dict=IPADAPTER_REPO,
     subfolder="models",
+    weight_name=IPADAPTER_WEIGHT_NAME
 )
+# Load vision encoder and processor for IP-Adapter embedding
+vision_encoder = CLIPVisionModelWithProjection.from_pretrained(
+    f"{IPADAPTER_REPO}/clip_vision_model",
+    cache_dir=CACHE_DIR,
+)
+image_processor = CLIPImageProcessor.from_pretrained(
+    f"{IPADAPTER_REPO}/clip_vision_model",
+    cache_dir=CACHE_DIR,
+)
+def generate_sticker(input_image: Image.Image, prompt: str):
     """
     Given a user image and a prompt, generates a sticker/emoji-style portrait.
     """
     # ).to(DEVICE)
     # Preprocess the image (resize, etc)
+    face_img = input_image.convert("RGB").resize((224, 224))
+    inputs = image_processor(images=face_img, return_tensors="pt").to(DEVICE)
+    with torch.no_grad():
+        image_embeds = vision_encoder(**inputs).image_embeds
+    # 2. Prepare image for SD pipeline
     init_image = input_image.convert("RGB").resize((512, 512))
     # Run inference (low strength for identity preservation)
     result = pipe(
         prompt=prompt,
         image=init_image,
+        image_embeds=image_embeds,
         strength=0.65,
         guidance_scale=7.5,
         num_inference_steps=30