Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -40,11 +40,9 @@ class Args:
|
|
40 |
self.seed = 42
|
41 |
self.guidance_scale = 2.0
|
42 |
self.mixed_precision = None
|
43 |
-
|
44 |
-
# Determine the device to be used for computations (CUDA if available)
|
45 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
46 |
|
47 |
-
|
|
|
48 |
|
49 |
def pil_to_tensor(images):
|
50 |
images = np.array(images).astype(np.float32) / 255.0
|
@@ -56,44 +54,46 @@ def pil_to_tensor(images):
|
|
56 |
args = Args()
|
57 |
|
58 |
# Define the data type for model weights
|
59 |
-
weight_dtype = torch.
|
60 |
|
61 |
if args.seed is not None:
|
62 |
set_seed(args.seed)
|
63 |
|
64 |
|
65 |
# Load scheduler, tokenizer and models.
|
|
|
66 |
noise_scheduler = DDPMScheduler.from_pretrained(args.pretrained_model_name_or_path, subfolder="scheduler")
|
67 |
vae = AutoencoderKL.from_pretrained(
|
68 |
args.pretrained_model_name_or_path,
|
69 |
subfolder="vae",
|
70 |
-
torch_dtype=torch.
|
71 |
)
|
72 |
unet = UNet2DConditionModel.from_pretrained(
|
73 |
args.pretrained_model_name_or_path,
|
74 |
subfolder="unet",
|
75 |
-
torch_dtype=torch.
|
76 |
)
|
77 |
image_encoder = CLIPVisionModelWithProjection.from_pretrained(
|
78 |
args.pretrained_model_name_or_path,
|
79 |
subfolder="image_encoder",
|
80 |
-
torch_dtype=torch.
|
81 |
)
|
82 |
unet_encoder = UNet2DConditionModel_ref.from_pretrained(
|
83 |
args.pretrained_model_name_or_path,
|
84 |
subfolder="unet_encoder",
|
85 |
-
torch_dtype=torch.
|
86 |
)
|
87 |
text_encoder_one = CLIPTextModel.from_pretrained(
|
88 |
args.pretrained_model_name_or_path,
|
89 |
subfolder="text_encoder",
|
90 |
-
torch_dtype=torch.
|
91 |
)
|
92 |
text_encoder_two = CLIPTextModelWithProjection.from_pretrained(
|
93 |
args.pretrained_model_name_or_path,
|
94 |
subfolder="text_encoder_2",
|
95 |
-
torch_dtype=torch.
|
96 |
)
|
|
|
97 |
tokenizer_one = AutoTokenizer.from_pretrained(
|
98 |
args.pretrained_model_name_or_path,
|
99 |
subfolder="tokenizer",
|
@@ -113,9 +113,8 @@ image_encoder.requires_grad_(False)
|
|
113 |
unet_encoder.requires_grad_(False)
|
114 |
text_encoder_one.requires_grad_(False)
|
115 |
text_encoder_two.requires_grad_(False)
|
116 |
-
unet_encoder.
|
117 |
-
|
118 |
-
unet_encoder.eval()
|
119 |
|
120 |
pipe = TryonPipeline.from_pretrained(
|
121 |
args.pretrained_model_name_or_path,
|
@@ -129,13 +128,11 @@ pipe = TryonPipeline.from_pretrained(
|
|
129 |
scheduler = noise_scheduler,
|
130 |
image_encoder=image_encoder,
|
131 |
unet_encoder = unet_encoder,
|
132 |
-
torch_dtype=torch.
|
133 |
-
)
|
134 |
-
|
135 |
-
# pipe.enable_model_cpu_offload()
|
136 |
-
# pipe.enable_vae_slicing()
|
137 |
-
# Function to generate the image based on inputs
|
138 |
def generate_virtual_try_on(person_image, cloth_image, mask_image, pose_image,cloth_des):
|
|
|
139 |
# Prepare the input images as tensors
|
140 |
person_image = person_image.resize((args.width, args.height))
|
141 |
cloth_image = cloth_image.resize((args.width, args.height))
|
|
|
40 |
self.seed = 42
|
41 |
self.guidance_scale = 2.0
|
42 |
self.mixed_precision = None
|
|
|
|
|
|
|
43 |
|
44 |
+
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
|
45 |
+
|
46 |
|
47 |
def pil_to_tensor(images):
|
48 |
images = np.array(images).astype(np.float32) / 255.0
|
|
|
54 |
args = Args()
|
55 |
|
56 |
# Define the data type for model weights
|
57 |
+
weight_dtype = torch.float16
|
58 |
|
59 |
if args.seed is not None:
|
60 |
set_seed(args.seed)
|
61 |
|
62 |
|
63 |
# Load scheduler, tokenizer and models.
|
64 |
+
|
65 |
noise_scheduler = DDPMScheduler.from_pretrained(args.pretrained_model_name_or_path, subfolder="scheduler")
|
66 |
vae = AutoencoderKL.from_pretrained(
|
67 |
args.pretrained_model_name_or_path,
|
68 |
subfolder="vae",
|
69 |
+
torch_dtype=torch.float16,
|
70 |
)
|
71 |
unet = UNet2DConditionModel.from_pretrained(
|
72 |
args.pretrained_model_name_or_path,
|
73 |
subfolder="unet",
|
74 |
+
torch_dtype=torch.float16,
|
75 |
)
|
76 |
image_encoder = CLIPVisionModelWithProjection.from_pretrained(
|
77 |
args.pretrained_model_name_or_path,
|
78 |
subfolder="image_encoder",
|
79 |
+
torch_dtype=torch.float16,
|
80 |
)
|
81 |
unet_encoder = UNet2DConditionModel_ref.from_pretrained(
|
82 |
args.pretrained_model_name_or_path,
|
83 |
subfolder="unet_encoder",
|
84 |
+
torch_dtype=torch.float16,
|
85 |
)
|
86 |
text_encoder_one = CLIPTextModel.from_pretrained(
|
87 |
args.pretrained_model_name_or_path,
|
88 |
subfolder="text_encoder",
|
89 |
+
torch_dtype=torch.float16,
|
90 |
)
|
91 |
text_encoder_two = CLIPTextModelWithProjection.from_pretrained(
|
92 |
args.pretrained_model_name_or_path,
|
93 |
subfolder="text_encoder_2",
|
94 |
+
torch_dtype=torch.float16,
|
95 |
)
|
96 |
+
|
97 |
tokenizer_one = AutoTokenizer.from_pretrained(
|
98 |
args.pretrained_model_name_or_path,
|
99 |
subfolder="tokenizer",
|
|
|
113 |
unet_encoder.requires_grad_(False)
|
114 |
text_encoder_one.requires_grad_(False)
|
115 |
text_encoder_two.requires_grad_(False)
|
116 |
+
unet_encoder.requires_grad_(False)
|
117 |
+
|
|
|
118 |
|
119 |
pipe = TryonPipeline.from_pretrained(
|
120 |
args.pretrained_model_name_or_path,
|
|
|
128 |
scheduler = noise_scheduler,
|
129 |
image_encoder=image_encoder,
|
130 |
unet_encoder = unet_encoder,
|
131 |
+
torch_dtype=torch.float16,
|
132 |
+
)
|
133 |
+
|
|
|
|
|
|
|
134 |
def generate_virtual_try_on(person_image, cloth_image, mask_image, pose_image,cloth_des):
|
135 |
+
pipe.to(device)
|
136 |
# Prepare the input images as tensors
|
137 |
person_image = person_image.resize((args.width, args.height))
|
138 |
cloth_image = cloth_image.resize((args.width, args.height))
|