Yaron Koresh commited on
Commit
e7149a6
·
verified ·
1 Parent(s): 093702c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -60
app.py CHANGED
@@ -24,8 +24,7 @@ from lxml.html import fromstring
24
  from diffusers.utils import export_to_video, load_image
25
  from huggingface_hub import hf_hub_download
26
  from safetensors.torch import load_file, save_file
27
- from diffusers import StableDiffusionPipeline, CogVideoXImageToVideoPipeline, DDIMScheduler #, AnimateDiffPipeline
28
- from diffusers.models import AutoencoderKL #, MotionAdapter
29
  from PIL import Image, ImageDraw, ImageFont
30
 
31
  # logging
@@ -48,23 +47,20 @@ else:
48
  device = "cpu"
49
  dtype = torch.float16
50
 
51
- #base = "emilianJR/epiCRealism"
52
- base = "SG161222/Realistic_Vision_V5.1_noVAE"
53
- vae_id = "stabilityai/sd-vae-ft-mse"
54
- #motion_adapter = "guoyww/animatediff-motion-adapter-v1-5-3"
55
 
56
  # variable data
57
 
58
- last_motion=""
59
 
60
  # precision data
61
 
62
  seq=512
63
- fast=False
64
  fps=20
65
  width=768
66
  height=768
67
- step=40
 
68
  accu=7
69
 
70
  # ui data
@@ -112,30 +108,14 @@ function custom(){
112
  # torch pipes
113
 
114
  image_pipe = StableDiffusionPipeline.from_pretrained(base, torch_dtype=dtype, safety_checker=None).to(device)
115
- #adapter = MotionAdapter.from_pretrained(motion_adapter, torch_dtype=dtype, safety_checker=None).to(device)
116
- vae = AutoencoderKL.from_pretrained(vae_id, torch_dtype=torch.float16).to(device)
117
- image_pipe.vae = vae
118
-
119
- scheduler = DDIMScheduler.from_pretrained(
120
- base,
121
- subfolder="scheduler",
122
- clip_sample=False,
123
- timestep_spacing="linspace",
124
- beta_schedule="linear",
125
- steps_offset=1,
126
- )
127
-
128
  video_pipe = CogVideoXImageToVideoPipeline.from_pretrained(
129
  "THUDM/CogVideoX-5b-I2V",
130
  torch_dtype=torch.bfloat16
131
  ).to(device)
132
- video_pipe.scheduler = scheduler
133
 
134
  video_pipe.vae.enable_tiling()
135
  video_pipe.vae.enable_slicing()
136
- #pipe.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter_sd15.bin")
137
  video_pipe.enable_model_cpu_offload()
138
- #pipe.enable_free_init(method="butterworth", use_fast_sampling=fast)
139
 
140
  # functionality
141
 
@@ -199,7 +179,7 @@ def pipe_generate(img,p1,p2,motion,time,title):
199
  width=width,
200
  guidance_scale=accu,
201
  num_images_per_prompt=1,
202
- num_inference_steps=step,
203
  max_sequence_length=seq,
204
  need_safetycheck=False,
205
  generator=torch.Generator(device).manual_seed(int(str(random.random()).split(".")[1]))
@@ -217,20 +197,11 @@ def pipe_generate(img,p1,p2,motion,time,title):
217
  if time == 0.0:
218
  return img
219
 
220
- if last_motion != motion:
221
- if last_motion != "":
222
- pipe.unload_lora_weights()
223
- if motion != "":
224
- pipe.load_lora_weights(motion, adapter_name="motion")
225
- pipe.fuse_lora()
226
- pipe.set_adapters("motion", [0.7])
227
- last_motion = motion
228
-
229
  return video_pipe(
230
  prompt=p1,
231
  negative_prompt=p2,
232
  image=img,
233
- num_inference_steps=step,
234
  guidance_scale=accu,
235
  num_videos_per_prompt=1,
236
  num_frames=(fps*time),
@@ -245,14 +216,14 @@ def handle_generate(*_inp):
245
  inp[2] = translate(inp[2],"english")
246
 
247
  if inp[2] != "":
248
- inp[2] = ", related to: " + inp[2] + "."
249
 
250
- inp[2] = f"The content which is faked, errored, unreal, off topic, pixelated, deformed, and semi-realistic, cgi, 3d, sketch, cartoon, drawing, anime, cropped, out of frame, low quality, textual, jpeg artifacts, ugly, duplicated, weird, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutations, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck content{inp[2]}"
251
 
252
  if inp[1] != "":
253
- inp[1] = ", related to: " + inp[1] + "."
254
 
255
- inp[1] = f'The content which is photographed, realistic, true, genuine, dynamic poze, authentic, deep field, reasonable, natural, best quality, focused, highly detailed content{inp[1]}'
256
 
257
  print(f"""
258
 
@@ -305,27 +276,9 @@ def ui():
305
  maximum=600.0,
306
  value=0.0,
307
  step=5.0,
308
- label="MP4/PNG Duration (0s = PNG)"
309
  )
310
- motion = gr.Dropdown(
311
- label='GIF camera movement',
312
- show_label=True,
313
- container=False,
314
- choices=[
315
- ("(No Effect)", ""),
316
- ("Zoom in", "guoyww/animatediff-motion-lora-zoom-in"),
317
- ("Zoom out", "guoyww/animatediff-motion-lora-zoom-out"),
318
- ("Tilt up", "guoyww/animatediff-motion-lora-tilt-up"),
319
- ("Tilt down", "guoyww/animatediff-motion-lora-tilt-down"),
320
- ("Pan left", "guoyww/animatediff-motion-lora-pan-left"),
321
- ("Pan right", "guoyww/animatediff-motion-lora-pan-right"),
322
- ("Roll left", "guoyww/animatediff-motion-lora-rolling-anticlockwise"),
323
- ("Roll right", "guoyww/animatediff-motion-lora-rolling-clockwise"),
324
- ],
325
- value="",
326
- interactive=True
327
- )
328
- with gr.Row(elem_id="col-container"):
329
  with gr.Column():
330
  img = gr.Image(label="Upload photo",show_label=True,container=False,type="pil")
331
  with gr.Column():
 
24
  from diffusers.utils import export_to_video, load_image
25
  from huggingface_hub import hf_hub_download
26
  from safetensors.torch import load_file, save_file
27
+ from diffusers import StableDiffusionPipeline, CogVideoXImageToVideoPipeline
 
28
  from PIL import Image, ImageDraw, ImageFont
29
 
30
  # logging
 
47
  device = "cpu"
48
  dtype = torch.float16
49
 
50
+ base = "emilianJR/epiCRealism"
 
 
 
51
 
52
  # variable data
53
 
54
+
55
 
56
  # precision data
57
 
58
  seq=512
 
59
  fps=20
60
  width=768
61
  height=768
62
+ image_steps=40
63
+ video_steps=20
64
  accu=7
65
 
66
  # ui data
 
108
  # torch pipes
109
 
110
  image_pipe = StableDiffusionPipeline.from_pretrained(base, torch_dtype=dtype, safety_checker=None).to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  video_pipe = CogVideoXImageToVideoPipeline.from_pretrained(
112
  "THUDM/CogVideoX-5b-I2V",
113
  torch_dtype=torch.bfloat16
114
  ).to(device)
 
115
 
116
  video_pipe.vae.enable_tiling()
117
  video_pipe.vae.enable_slicing()
 
118
  video_pipe.enable_model_cpu_offload()
 
119
 
120
  # functionality
121
 
 
179
  width=width,
180
  guidance_scale=accu,
181
  num_images_per_prompt=1,
182
+ num_inference_steps=image_steps,
183
  max_sequence_length=seq,
184
  need_safetycheck=False,
185
  generator=torch.Generator(device).manual_seed(int(str(random.random()).split(".")[1]))
 
197
  if time == 0.0:
198
  return img
199
 
 
 
 
 
 
 
 
 
 
200
  return video_pipe(
201
  prompt=p1,
202
  negative_prompt=p2,
203
  image=img,
204
+ num_inference_steps=video_steps,
205
  guidance_scale=accu,
206
  num_videos_per_prompt=1,
207
  num_frames=(fps*time),
 
216
  inp[2] = translate(inp[2],"english")
217
 
218
  if inp[2] != "":
219
+ inp[2] = ", related to: " + inp[2]
220
 
221
+ inp[2] = f"faked, errored, unreal, off topic, pixelated, deformed, and semi-realistic, cgi, 3d, sketch, cartoon, drawing, anime, cropped, out of frame, low quality, textual, jpeg artifacts, ugly, duplicated, weird, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutations, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck{inp[2]}"
222
 
223
  if inp[1] != "":
224
+ inp[1] = ", related to: " + inp[1]
225
 
226
+ inp[1] = f'photographed, realistic, true, genuine, dynamic poze, authentic, deep field, reasonable, natural, best quality, focused, highly detailed{inp[1]}'
227
 
228
  print(f"""
229
 
 
276
  maximum=600.0,
277
  value=0.0,
278
  step=5.0,
279
+ label="Duration (0s = PNG)"
280
  )
281
+ with gr.Row(elem_id="col-container"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
  with gr.Column():
283
  img = gr.Image(label="Upload photo",show_label=True,container=False,type="pil")
284
  with gr.Column():