Sticker_Diffusion / utils.py
KarthikAI's picture
Update utils.py
dd3fef9 verified
import os
os.environ["HF_HOME"] = "/data/huggingface"
os.environ["TRANSFORMERS_CACHE"] = "/data/huggingface"
os.makedirs("/data/huggingface/hub", exist_ok=True)
# os.makedirs("/data/huggingface/clip_vision_model", exist_ok=True)
import torch
from diffusers import StableDiffusionImg2ImgPipeline
from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
from PIL import Image
# --- Place any download or path setup here --- old
# MODEL_ID ="runwayml/stable-diffusion-v1-5" # Can swap for custom path if using IP-Adapter
# ADAPTER_PATH = "/workspace/.cache/huggingface/ip_adapter/ip-adapter_sd15.bin"
# ADAPTER_DIR = "/workspace/.cache/huggingface/ip_adapter"
# DEVICE = "cpu"
# MODEL_CACHE = "/workspace/.cache/huggingface"
# ---- SETTINGS ----
MODEL_ID = "runwayml/stable-diffusion-v1-5"
IPADAPTER_REPO = "h94/IP-Adapter"
IPADAPTER_WEIGHT_NAME = "ip-adapter_sd15.bin"
DEVICE = "cpu" # Change to "cuda" if you have GPU
CACHE_DIR = os.environ.get("HF_HOME", "/data/huggingface")
# (Optional) Download IP-Adapter weights and patch pipeline if desired
# Load the model ONCE at startup, not per request!
pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
MODEL_ID,
torch_dtype=torch.float32,
cache_dir=CACHE_DIR,
# safety_checker=None, # Disable for demo/testing; enable in prod
).to(DEVICE)
pipe.load_ip_adapter(
pretrained_model_name_or_path_or_dict=IPADAPTER_REPO,
subfolder="models",
weight_name=IPADAPTER_WEIGHT_NAME
)
# # Load vision encoder and processor for IP-Adapter embedding
# vision_encoder = CLIPVisionModelWithProjection.from_pretrained(
# "h94/IP-Adapter", # repo_id (main IP-Adapter repo)
# subfolder="clip_vision_model",# subfolder within the repo!
# cache_dir=CACHE_DIR
# )
# image_processor = CLIPImageProcessor.from_pretrained(
# "h94/IP-Adapter",
# subfolder="clip_vision_model",
# cache_dir=CACHE_DIR
# )
def generate_sticker(input_image: Image.Image, style: str ):
"""
Given a user image and a prompt, generates a sticker/emoji-style portrait.
"""
# Load the model (download if not present)
# pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
# MODEL_ID,
# torch_dtype=torch.float32,
# cache_dir=MODEL_CACHE,
# safety_checker=None, # Disable for demo/testing
# ).to(DEVICE)
# Preprocess the image (resize, etc)
face_img = input_image.convert("RGB").resize((512, 512))
# inputs = image_processor(images=face_img, return_tensors="pt").to(DEVICE)
# with torch.no_grad():
# image_embeds = vision_encoder(**inputs).image_embeds
# 2. Prepare image for SD pipeline
init_image = input_image.convert("RGB").resize((512, 512))
# IP-Adapter expects the reference image via image_embeds, which is produced by this function:
# image_embeds = pipe.prepare_ip_adapter_image_embeds(face_img)
prompt = (f"A set of twelve {style}-style digital stickers"
"each with a different expression: laughing, angry, crying, sulking, thinking, sleepy, blowing a kiss, winking, surprised, happy, sad, and confused. "
"Each sticker has a bold black outline and a transparent background, in a playful, close-up cartoon style."
)
# Run inference (low strength for identity preservation)
result = pipe(
prompt=prompt,
image=init_image,
# image_embeds=image_embeds,
ip_adapter_image=face_img,
strength=0.6,
guidance_scale=8,
num_inference_steps=40
)
# Return the generated image (as PIL)
return result.images[0]