File size: 3,630 Bytes
a1f69bb 86ea5fd a1f69bb d8aa11d a1f69bb d8aa11d a1f69bb d8aa11d a1f69bb 86ea5fd a1f69bb 86ea5fd d8aa11d 86ea5fd a1f69bb d8aa11d 86ea5fd d8aa11d 86ea5fd 35a268c 36be636 86ea5fd 35a268c ee45526 35a268c 86ea5fd a1f69bb 86ea5fd d8aa11d a1f69bb d8aa11d a1f69bb d8aa11d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
import spaces
import rembg
import torch
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, AutoPipelineForImage2Image
import cv2
from transformers import pipeline
import numpy as np
from PIL import Image
import gradio as gr
# pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
# pipe.to("cuda")
def check_prompt(prompt):
if prompt is None:
raise gr.Error("Please enter a prompt!")
controlNet_normal = ControlNetModel.from_pretrained(
"fusing/stable-diffusion-v1-5-controlnet-normal",
torch_dtype=torch.float16
)
controlNet_depth = ControlNetModel.from_pretrained(
"lllyasviel/sd-controlnet-depth",
torch_dtype=torch.float16
)
controlNet_MAP = {"Normal": controlNet_normal, "Depth": controlNet_depth}
# vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16, use_safetensors=True)
# Function to generate an image from text using diffusion
@spaces.GPU
def generate_txttoimg(prompt, control_image, controlnet):
prompt += "no background, side view, minimalist shot, single shoe, no legs, product photo"
textpipe = StableDiffusionControlNetPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5",
controlnet=controlNet_MAP[controlnet],
torch_dtype=torch.float16,
safety_checker = None
)
textpipe.to("cuda")
if controlnet == "Normal":
control_image = get_normal(control_image)
elif controlnet == "Depth":
control_image = get_depth(control_image)
image = textpipe(prompt, image=control_image).images[0]
image2 = rembg.remove(image)
return image2
@spaces.GPU
def generate_imgtoimg(prompt, init_image, strength=0.5):
prompt += ", no background, side view, minimalist shot, single shoe, no legs, product photo"
imagepipe = AutoPipelineForImage2Image.from_pretrained(
"stabilityai/stable-diffusion-xl-refiner-1.0", torch_dtype=torch.float32, use_safetensors=True
)
image = imagepipe(prompt, image=init_image, strength=strength).images[0]
image2 = rembg.remove(image)
return image2
def get_normal(image):
depth_estimator = pipeline("depth-estimation", model ="Intel/dpt-hybrid-midas" )
image = depth_estimator(image)['predicted_depth'][0]
image = image.numpy()
image_depth = image.copy()
image_depth -= np.min(image_depth)
image_depth /= np.max(image_depth)
bg_threhold = 0.4
x = cv2.Sobel(image, cv2.CV_32F, 1, 0, ksize=3)
x[image_depth < bg_threhold] = 0
y = cv2.Sobel(image, cv2.CV_32F, 0, 1, ksize=3)
y[image_depth < bg_threhold] = 0
z = np.ones_like(x) * np.pi * 2.0
image = np.stack([x, y, z], axis=2)
image /= np.sum(image ** 2.0, axis=2, keepdims=True) ** 0.5
image = (image * 127.5 + 127.5).clip(0, 255).astype(np.uint8)
normalimage = Image.fromarray(image)
return normalimage
def get_depth(image):
depth_estimator = pipeline('depth-estimation')
image = depth_estimator(image)['depth']
image = np.array(image)
image = image[:, :, None]
image = np.concatenate([image, image, image], axis=2)
depthimage = Image.fromarray(image)
return depthimage
# def get_canny(image):
# image = np.array(image)
# low_threshold = 100
# high_threshold = 200
# image = cv2.Canny(image,low_threshold,high_threshold)
# image = image[:,:,None]
# image = np.concatenate([image, image, image], axis=2)
# canny_image = Image.fromarray(image)
# return canny_image |