Spaces:
Running
Running
Yaron Koresh
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -24,8 +24,7 @@ from lxml.html import fromstring
|
|
24 |
from diffusers.utils import export_to_video, load_image
|
25 |
from huggingface_hub import hf_hub_download
|
26 |
from safetensors.torch import load_file, save_file
|
27 |
-
from diffusers import StableDiffusionPipeline, CogVideoXImageToVideoPipeline
|
28 |
-
from diffusers.models import AutoencoderKL #, MotionAdapter
|
29 |
from PIL import Image, ImageDraw, ImageFont
|
30 |
|
31 |
# logging
|
@@ -48,23 +47,20 @@ else:
|
|
48 |
device = "cpu"
|
49 |
dtype = torch.float16
|
50 |
|
51 |
-
|
52 |
-
base = "SG161222/Realistic_Vision_V5.1_noVAE"
|
53 |
-
vae_id = "stabilityai/sd-vae-ft-mse"
|
54 |
-
#motion_adapter = "guoyww/animatediff-motion-adapter-v1-5-3"
|
55 |
|
56 |
# variable data
|
57 |
|
58 |
-
|
59 |
|
60 |
# precision data
|
61 |
|
62 |
seq=512
|
63 |
-
fast=False
|
64 |
fps=20
|
65 |
width=768
|
66 |
height=768
|
67 |
-
|
|
|
68 |
accu=7
|
69 |
|
70 |
# ui data
|
@@ -112,30 +108,14 @@ function custom(){
|
|
112 |
# torch pipes
|
113 |
|
114 |
image_pipe = StableDiffusionPipeline.from_pretrained(base, torch_dtype=dtype, safety_checker=None).to(device)
|
115 |
-
#adapter = MotionAdapter.from_pretrained(motion_adapter, torch_dtype=dtype, safety_checker=None).to(device)
|
116 |
-
vae = AutoencoderKL.from_pretrained(vae_id, torch_dtype=torch.float16).to(device)
|
117 |
-
image_pipe.vae = vae
|
118 |
-
|
119 |
-
scheduler = DDIMScheduler.from_pretrained(
|
120 |
-
base,
|
121 |
-
subfolder="scheduler",
|
122 |
-
clip_sample=False,
|
123 |
-
timestep_spacing="linspace",
|
124 |
-
beta_schedule="linear",
|
125 |
-
steps_offset=1,
|
126 |
-
)
|
127 |
-
|
128 |
video_pipe = CogVideoXImageToVideoPipeline.from_pretrained(
|
129 |
"THUDM/CogVideoX-5b-I2V",
|
130 |
torch_dtype=torch.bfloat16
|
131 |
).to(device)
|
132 |
-
video_pipe.scheduler = scheduler
|
133 |
|
134 |
video_pipe.vae.enable_tiling()
|
135 |
video_pipe.vae.enable_slicing()
|
136 |
-
#pipe.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter_sd15.bin")
|
137 |
video_pipe.enable_model_cpu_offload()
|
138 |
-
#pipe.enable_free_init(method="butterworth", use_fast_sampling=fast)
|
139 |
|
140 |
# functionality
|
141 |
|
@@ -199,7 +179,7 @@ def pipe_generate(img,p1,p2,motion,time,title):
|
|
199 |
width=width,
|
200 |
guidance_scale=accu,
|
201 |
num_images_per_prompt=1,
|
202 |
-
num_inference_steps=
|
203 |
max_sequence_length=seq,
|
204 |
need_safetycheck=False,
|
205 |
generator=torch.Generator(device).manual_seed(int(str(random.random()).split(".")[1]))
|
@@ -217,20 +197,11 @@ def pipe_generate(img,p1,p2,motion,time,title):
|
|
217 |
if time == 0.0:
|
218 |
return img
|
219 |
|
220 |
-
if last_motion != motion:
|
221 |
-
if last_motion != "":
|
222 |
-
pipe.unload_lora_weights()
|
223 |
-
if motion != "":
|
224 |
-
pipe.load_lora_weights(motion, adapter_name="motion")
|
225 |
-
pipe.fuse_lora()
|
226 |
-
pipe.set_adapters("motion", [0.7])
|
227 |
-
last_motion = motion
|
228 |
-
|
229 |
return video_pipe(
|
230 |
prompt=p1,
|
231 |
negative_prompt=p2,
|
232 |
image=img,
|
233 |
-
num_inference_steps=
|
234 |
guidance_scale=accu,
|
235 |
num_videos_per_prompt=1,
|
236 |
num_frames=(fps*time),
|
@@ -245,14 +216,14 @@ def handle_generate(*_inp):
|
|
245 |
inp[2] = translate(inp[2],"english")
|
246 |
|
247 |
if inp[2] != "":
|
248 |
-
inp[2] = ", related to: " + inp[2]
|
249 |
|
250 |
-
inp[2] = f"
|
251 |
|
252 |
if inp[1] != "":
|
253 |
-
inp[1] = ", related to: " + inp[1]
|
254 |
|
255 |
-
inp[1] = f'
|
256 |
|
257 |
print(f"""
|
258 |
|
@@ -305,27 +276,9 @@ def ui():
|
|
305 |
maximum=600.0,
|
306 |
value=0.0,
|
307 |
step=5.0,
|
308 |
-
label="
|
309 |
)
|
310 |
-
|
311 |
-
label='GIF camera movement',
|
312 |
-
show_label=True,
|
313 |
-
container=False,
|
314 |
-
choices=[
|
315 |
-
("(No Effect)", ""),
|
316 |
-
("Zoom in", "guoyww/animatediff-motion-lora-zoom-in"),
|
317 |
-
("Zoom out", "guoyww/animatediff-motion-lora-zoom-out"),
|
318 |
-
("Tilt up", "guoyww/animatediff-motion-lora-tilt-up"),
|
319 |
-
("Tilt down", "guoyww/animatediff-motion-lora-tilt-down"),
|
320 |
-
("Pan left", "guoyww/animatediff-motion-lora-pan-left"),
|
321 |
-
("Pan right", "guoyww/animatediff-motion-lora-pan-right"),
|
322 |
-
("Roll left", "guoyww/animatediff-motion-lora-rolling-anticlockwise"),
|
323 |
-
("Roll right", "guoyww/animatediff-motion-lora-rolling-clockwise"),
|
324 |
-
],
|
325 |
-
value="",
|
326 |
-
interactive=True
|
327 |
-
)
|
328 |
-
with gr.Row(elem_id="col-container"):
|
329 |
with gr.Column():
|
330 |
img = gr.Image(label="Upload photo",show_label=True,container=False,type="pil")
|
331 |
with gr.Column():
|
|
|
24 |
from diffusers.utils import export_to_video, load_image
|
25 |
from huggingface_hub import hf_hub_download
|
26 |
from safetensors.torch import load_file, save_file
|
27 |
+
from diffusers import StableDiffusionPipeline, CogVideoXImageToVideoPipeline
|
|
|
28 |
from PIL import Image, ImageDraw, ImageFont
|
29 |
|
30 |
# logging
|
|
|
47 |
device = "cpu"
|
48 |
dtype = torch.float16
|
49 |
|
50 |
+
base = "emilianJR/epiCRealism"
|
|
|
|
|
|
|
51 |
|
52 |
# variable data
|
53 |
|
54 |
+
|
55 |
|
56 |
# precision data
|
57 |
|
58 |
seq=512
|
|
|
59 |
fps=20
|
60 |
width=768
|
61 |
height=768
|
62 |
+
image_steps=40
|
63 |
+
video_steps=20
|
64 |
accu=7
|
65 |
|
66 |
# ui data
|
|
|
108 |
# torch pipes
|
109 |
|
110 |
image_pipe = StableDiffusionPipeline.from_pretrained(base, torch_dtype=dtype, safety_checker=None).to(device)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
video_pipe = CogVideoXImageToVideoPipeline.from_pretrained(
|
112 |
"THUDM/CogVideoX-5b-I2V",
|
113 |
torch_dtype=torch.bfloat16
|
114 |
).to(device)
|
|
|
115 |
|
116 |
video_pipe.vae.enable_tiling()
|
117 |
video_pipe.vae.enable_slicing()
|
|
|
118 |
video_pipe.enable_model_cpu_offload()
|
|
|
119 |
|
120 |
# functionality
|
121 |
|
|
|
179 |
width=width,
|
180 |
guidance_scale=accu,
|
181 |
num_images_per_prompt=1,
|
182 |
+
num_inference_steps=image_steps,
|
183 |
max_sequence_length=seq,
|
184 |
need_safetycheck=False,
|
185 |
generator=torch.Generator(device).manual_seed(int(str(random.random()).split(".")[1]))
|
|
|
197 |
if time == 0.0:
|
198 |
return img
|
199 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
return video_pipe(
|
201 |
prompt=p1,
|
202 |
negative_prompt=p2,
|
203 |
image=img,
|
204 |
+
num_inference_steps=video_steps,
|
205 |
guidance_scale=accu,
|
206 |
num_videos_per_prompt=1,
|
207 |
num_frames=(fps*time),
|
|
|
216 |
inp[2] = translate(inp[2],"english")
|
217 |
|
218 |
if inp[2] != "":
|
219 |
+
inp[2] = ", related to: " + inp[2]
|
220 |
|
221 |
+
inp[2] = f"faked, errored, unreal, off topic, pixelated, deformed, and semi-realistic, cgi, 3d, sketch, cartoon, drawing, anime, cropped, out of frame, low quality, textual, jpeg artifacts, ugly, duplicated, weird, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutations, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck{inp[2]}"
|
222 |
|
223 |
if inp[1] != "":
|
224 |
+
inp[1] = ", related to: " + inp[1]
|
225 |
|
226 |
+
inp[1] = f'photographed, realistic, true, genuine, dynamic poze, authentic, deep field, reasonable, natural, best quality, focused, highly detailed{inp[1]}'
|
227 |
|
228 |
print(f"""
|
229 |
|
|
|
276 |
maximum=600.0,
|
277 |
value=0.0,
|
278 |
step=5.0,
|
279 |
+
label="Duration (0s = PNG)"
|
280 |
)
|
281 |
+
with gr.Row(elem_id="col-container"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
282 |
with gr.Column():
|
283 |
img = gr.Image(label="Upload photo",show_label=True,container=False,type="pil")
|
284 |
with gr.Column():
|