KarthikAI commited on
Commit
41ddd36
·
verified ·
1 Parent(s): 8d3155f

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +34 -16
utils.py CHANGED
@@ -5,15 +5,23 @@ os.makedirs("/data/huggingface/hub", exist_ok=True)
5
 
6
  import torch
7
  from diffusers import StableDiffusionImg2ImgPipeline
 
8
  from PIL import Image
9
 
10
 
11
- # --- Place any download or path setup here ---
12
- MODEL_ID ="runwayml/stable-diffusion-v1-5" # Can swap for custom path if using IP-Adapter
13
- ADAPTER_PATH = "/workspace/.cache/huggingface/ip_adapter/ip-adapter_sd15.bin"
14
- ADAPTER_DIR = "/workspace/.cache/huggingface/ip_adapter"
15
- DEVICE = "cpu"
16
- MODEL_CACHE = "/workspace/.cache/huggingface"
 
 
 
 
 
 
 
17
 
18
  # (Optional) Download IP-Adapter weights and patch pipeline if desired
19
 
@@ -21,24 +29,27 @@ MODEL_CACHE = "/workspace/.cache/huggingface"
21
  pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
22
  MODEL_ID,
23
  torch_dtype=torch.float32,
24
- cache_dir=MODEL_CACHE,
25
  # safety_checker=None, # Disable for demo/testing; enable in prod
26
  ).to(DEVICE)
27
 
28
  pipe.load_ip_adapter(
29
- pretrained_model_name_or_path_or_dict="h94/IP-Adapter",
30
  subfolder="models",
31
- weight_name="ip-adapter_sd15.bin"
32
  )
33
 
34
- # pipe.load_ip_adapter(
35
- # pretrained_model_name_or_path_or_dict=ADAPTER_DIR,
36
- # subfolder=".", # The weights file is directly in ADAPTER_DIR
37
- # weight_name="ip-adapter_sd15.bin"
38
- # # Optionally: subfolder="models" if using the repo, not a direct path
39
- # )
 
 
 
40
 
41
- def generate_sticker(input_image, prompt):
42
  """
43
  Given a user image and a prompt, generates a sticker/emoji-style portrait.
44
  """
@@ -51,12 +62,19 @@ def generate_sticker(input_image, prompt):
51
  # ).to(DEVICE)
52
 
53
  # Preprocess the image (resize, etc)
 
 
 
 
 
 
54
  init_image = input_image.convert("RGB").resize((512, 512))
55
 
56
  # Run inference (low strength for identity preservation)
57
  result = pipe(
58
  prompt=prompt,
59
  image=init_image,
 
60
  strength=0.65,
61
  guidance_scale=7.5,
62
  num_inference_steps=30
 
5
 
6
  import torch
7
  from diffusers import StableDiffusionImg2ImgPipeline
8
+ from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
9
  from PIL import Image
10
 
11
 
12
+ # --- Place any download or path setup here --- old
13
+ # MODEL_ID ="runwayml/stable-diffusion-v1-5" # Can swap for custom path if using IP-Adapter
14
+ # ADAPTER_PATH = "/workspace/.cache/huggingface/ip_adapter/ip-adapter_sd15.bin"
15
+ # ADAPTER_DIR = "/workspace/.cache/huggingface/ip_adapter"
16
+ # DEVICE = "cpu"
17
+ # MODEL_CACHE = "/workspace/.cache/huggingface"
18
+
19
+ # ---- SETTINGS ----
20
+ MODEL_ID = "runwayml/stable-diffusion-v1-5"
21
+ IPADAPTER_REPO = "h94/IP-Adapter"
22
+ IPADAPTER_WEIGHT_NAME = "ip-adapter_sd15.bin"
23
+ DEVICE = "cpu" # Change to "cuda" if you have GPU
24
+ CACHE_DIR = os.environ.get("HF_HOME", "/data/huggingface")
25
 
26
  # (Optional) Download IP-Adapter weights and patch pipeline if desired
27
 
 
29
  pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
30
  MODEL_ID,
31
  torch_dtype=torch.float32,
32
+ cache_dir=CACHE_DIR,
33
  # safety_checker=None, # Disable for demo/testing; enable in prod
34
  ).to(DEVICE)
35
 
36
  pipe.load_ip_adapter(
37
+ pretrained_model_name_or_path_or_dict=IPADAPTER_REPO,
38
  subfolder="models",
39
+ weight_name=IPADAPTER_WEIGHT_NAME
40
  )
41
 
42
+ # Load vision encoder and processor for IP-Adapter embedding
43
+ vision_encoder = CLIPVisionModelWithProjection.from_pretrained(
44
+ f"{IPADAPTER_REPO}/clip_vision_model",
45
+ cache_dir=CACHE_DIR,
46
+ )
47
+ image_processor = CLIPImageProcessor.from_pretrained(
48
+ f"{IPADAPTER_REPO}/clip_vision_model",
49
+ cache_dir=CACHE_DIR,
50
+ )
51
 
52
+ def generate_sticker(input_image: Image.Image, prompt: str):
53
  """
54
  Given a user image and a prompt, generates a sticker/emoji-style portrait.
55
  """
 
62
  # ).to(DEVICE)
63
 
64
  # Preprocess the image (resize, etc)
65
+ face_img = input_image.convert("RGB").resize((224, 224))
66
+ inputs = image_processor(images=face_img, return_tensors="pt").to(DEVICE)
67
+ with torch.no_grad():
68
+ image_embeds = vision_encoder(**inputs).image_embeds
69
+
70
+ # 2. Prepare image for SD pipeline
71
  init_image = input_image.convert("RGB").resize((512, 512))
72
 
73
  # Run inference (low strength for identity preservation)
74
  result = pipe(
75
  prompt=prompt,
76
  image=init_image,
77
+ image_embeds=image_embeds,
78
  strength=0.65,
79
  guidance_scale=7.5,
80
  num_inference_steps=30