KarthikAI commited on
Commit
5e6959c
·
verified ·
1 Parent(s): 12c11e9

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +18 -15
utils.py CHANGED
@@ -2,7 +2,7 @@ import os
2
  os.environ["HF_HOME"] = "/data/huggingface"
3
  os.environ["TRANSFORMERS_CACHE"] = "/data/huggingface"
4
  os.makedirs("/data/huggingface/hub", exist_ok=True)
5
- os.makedirs("/data/huggingface/clip_vision_model", exist_ok=True)
6
 
7
  import torch
8
  from diffusers import StableDiffusionImg2ImgPipeline
@@ -40,18 +40,18 @@ pipe.load_ip_adapter(
40
  weight_name=IPADAPTER_WEIGHT_NAME
41
  )
42
 
43
- # Load vision encoder and processor for IP-Adapter embedding
44
- vision_encoder = CLIPVisionModelWithProjection.from_pretrained(
45
- "h94/IP-Adapter", # repo_id (main IP-Adapter repo)
46
- subfolder="clip_vision_model",# subfolder within the repo!
47
- cache_dir=CACHE_DIR
48
- )
49
 
50
- image_processor = CLIPImageProcessor.from_pretrained(
51
- "h94/IP-Adapter",
52
- subfolder="clip_vision_model",
53
- cache_dir=CACHE_DIR
54
- )
55
 
56
  def generate_sticker(input_image: Image.Image, prompt: str):
57
  """
@@ -67,13 +67,16 @@ def generate_sticker(input_image: Image.Image, prompt: str):
67
 
68
  # Preprocess the image (resize, etc)
69
  face_img = input_image.convert("RGB").resize((224, 224))
70
- inputs = image_processor(images=face_img, return_tensors="pt").to(DEVICE)
71
- with torch.no_grad():
72
- image_embeds = vision_encoder(**inputs).image_embeds
73
 
74
  # 2. Prepare image for SD pipeline
75
  init_image = input_image.convert("RGB").resize((512, 512))
76
 
 
 
 
77
  # Run inference (low strength for identity preservation)
78
  result = pipe(
79
  prompt=prompt,
 
2
  os.environ["HF_HOME"] = "/data/huggingface"
3
  os.environ["TRANSFORMERS_CACHE"] = "/data/huggingface"
4
  os.makedirs("/data/huggingface/hub", exist_ok=True)
5
+ # os.makedirs("/data/huggingface/clip_vision_model", exist_ok=True)
6
 
7
  import torch
8
  from diffusers import StableDiffusionImg2ImgPipeline
 
40
  weight_name=IPADAPTER_WEIGHT_NAME
41
  )
42
 
43
+ # # Load vision encoder and processor for IP-Adapter embedding
44
+ # vision_encoder = CLIPVisionModelWithProjection.from_pretrained(
45
+ # "h94/IP-Adapter", # repo_id (main IP-Adapter repo)
46
+ # subfolder="clip_vision_model",# subfolder within the repo!
47
+ # cache_dir=CACHE_DIR
48
+ # )
49
 
50
+ # image_processor = CLIPImageProcessor.from_pretrained(
51
+ # "h94/IP-Adapter",
52
+ # subfolder="clip_vision_model",
53
+ # cache_dir=CACHE_DIR
54
+ # )
55
 
56
  def generate_sticker(input_image: Image.Image, prompt: str):
57
  """
 
67
 
68
  # Preprocess the image (resize, etc)
69
  face_img = input_image.convert("RGB").resize((224, 224))
70
+ # inputs = image_processor(images=face_img, return_tensors="pt").to(DEVICE)
71
+ # with torch.no_grad():
72
+ # image_embeds = vision_encoder(**inputs).image_embeds
73
 
74
  # 2. Prepare image for SD pipeline
75
  init_image = input_image.convert("RGB").resize((512, 512))
76
 
77
+ # IP-Adapter expects the reference image via image_embeds, which is produced by this function:
78
+ image_embeds = pipe.prepare_ip_adapter_image_embeds(face_img)
79
+
80
  # Run inference (low strength for identity preservation)
81
  result = pipe(
82
  prompt=prompt,