Kokoro-API-3

Running

App Files Files Community

Yaron Koresh commited on Jan 16

Commit

169516c

verified ·

1 Parent(s): f031971

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -43

app.py CHANGED Viewed

@@ -21,13 +21,13 @@ import torch
 import gradio as gr
 from numpy import asarray as array
 from lxml.html import fromstring
-from diffusers.utils import export_to_gif, load_image
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file, save_file
-from diffusers import StableDiffusionPipeline
-from diffusers import AnimateDiffPipeline, DDIMScheduler
-from diffusers.models import AutoencoderKL, MotionAdapter
-from diffusers.schedulers import DPMSolverMultistepScheduler
 from PIL import Image, ImageDraw, ImageFont
 # logging
@@ -53,7 +53,7 @@ else:
 #base = "emilianJR/epiCRealism"
 base = "SG161222/Realistic_Vision_V5.1_noVAE"
 vae_id = "stabilityai/sd-vae-ft-mse"
-motion_adapter = "guoyww/animatediff-motion-adapter-v1-5-3"
 # variable data
@@ -114,18 +114,11 @@ function custom(){
 # torch pipes
 image_pipe = StableDiffusionPipeline.from_pretrained(base, torch_dtype=dtype, safety_checker=None).to(device)
-adapter = MotionAdapter.from_pretrained(motion_adapter, torch_dtype=dtype, safety_checker=None).to(device)
 vae = AutoencoderKL.from_pretrained(vae_id, torch_dtype=torch.float16).to(device)
-scheduler = DPMSolverMultistepScheduler.from_pretrained(
-    base,
-    subfolder="scheduler",
-    beta_schedule="linear",
-    algorithm_type="dpmsolver++",
-    use_karras_sigmas=True
-)
-scheduler2 = DDIMScheduler.from_pretrained(
     base,
     subfolder="scheduler",
     clip_sample=False,
@@ -134,19 +127,18 @@ scheduler2 = DDIMScheduler.from_pretrained(
     steps_offset=1,
 )
-pipe = AnimateDiffPipeline.from_pretrained(
-    base,
-    motion_adapter=adapter,
-    torch_dtype=torch.float16
 ).to(device)
-pipe.scheduler = scheduler2
-pipe.vae = vae
-pipe.enable_vae_slicing()
-pipe.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter_sd15.bin")
-pipe.enable_model_cpu_offload()
-pipe.enable_free_init(method="butterworth", use_fast_sampling=fast)
 # functionality
@@ -216,6 +208,15 @@ def pipe_generate(img,p1,p2,motion,time,title):
             generator=torch.Generator(device).manual_seed(int(str(random.random()).split(".")[1]))
         ).images[0]
     if time == 0.0:
         return img
@@ -227,13 +228,14 @@ def pipe_generate(img,p1,p2,motion,time,title):
             pipe.fuse_lora()
             pipe.set_adapters("motion", [0.7])
         last_motion = motion
-    return pipe(
         prompt=p1,
         negative_prompt=p2,
-        ip_adapter_image=load_image(img),
         num_inference_steps=step,
         guidance_scale=accu,
         num_frames=(fps*time),
         need_safetycheck=False,
         generator=torch.Generator(device).manual_seed(int(str(random.random()).split(".")[1]))
@@ -266,27 +268,18 @@ def handle_generate(*_inp):
     pipe_out = pipe_generate(*inp)
-    if inp[5] != "":
-        draw = ImageDraw.Draw(pipe_out)
-        textheight=84
-        font = ImageFont.truetype(r"OpenSans-Bold.ttf", textheight)
-        textwidth = draw.textlength(inp[5],font)
-        x = (width - textwidth) // 2
-        y = (height - textheight) // 2
-        draw.text((x, y), inp[5],font=font)
-    name = generate_random_string(12) + ( ".png" if time == 0 else ".gif" )
     if inp[4] == 0.0:
         pipe_out.save(name)
     else:
-        export_to_gif(pipe_out,name,fps=fps)
     return name
 def ui():
     global result
     with gr.Blocks(theme=gr.themes.Soft(),css=css,js=js) as demo:
         gr.Markdown(f"""
-            # MULTI-LANGUAGE GIF/PNG CREATOR
         """)
         with gr.Row(elem_id="col-container"):
             with gr.Column():
@@ -318,7 +311,7 @@ def ui():
                         maximum=600.0,
                         value=0.0,
                         step=5.0,
-                        label="GIF/PNG Duration (0s = PNG)"
                     )
                 with gr.Row():
                         motion = gr.Dropdown(

 import gradio as gr
 from numpy import asarray as array
 from lxml.html import fromstring
+from diffusers.utils import export_to_video, load_image
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file, save_file
+from diffusers import StableDiffusionPipeline, CogVideoXImageToVideoPipeline
+#from diffusers import AnimateDiffPipeline, DDIMScheduler
+#from diffusers.models import AutoencoderKL, MotionAdapter
+#from diffusers.schedulers import DPMSolverMultistepScheduler
 from PIL import Image, ImageDraw, ImageFont
 # logging
 #base = "emilianJR/epiCRealism"
 base = "SG161222/Realistic_Vision_V5.1_noVAE"
 vae_id = "stabilityai/sd-vae-ft-mse"
+#motion_adapter = "guoyww/animatediff-motion-adapter-v1-5-3"
 # variable data
 # torch pipes
 image_pipe = StableDiffusionPipeline.from_pretrained(base, torch_dtype=dtype, safety_checker=None).to(device)
+#adapter = MotionAdapter.from_pretrained(motion_adapter, torch_dtype=dtype, safety_checker=None).to(device)
 vae = AutoencoderKL.from_pretrained(vae_id, torch_dtype=torch.float16).to(device)
+image_pipe.vae = vae
+scheduler = DDIMScheduler.from_pretrained(
     base,
     subfolder="scheduler",
     clip_sample=False,
     steps_offset=1,
 )
+video_pipe = CogVideoXImageToVideoPipeline.from_pretrained(
+    "THUDM/CogVideoX-5b-I2V",
+    torch_dtype=torch.bfloat16,
+    safety_checker=None
 ).to(device)
+video_pipe.scheduler = scheduler2
+video_pipe.vae.enable_tiling()
+video_pipe.vae.enable_slicing()
+#pipe.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter_sd15.bin")
+video_pipe.enable_model_cpu_offload()
+video_pipe.enable_free_init(method="butterworth", use_fast_sampling=fast)
 # functionality
             generator=torch.Generator(device).manual_seed(int(str(random.random()).split(".")[1]))
         ).images[0]
+    if title != "":
+        draw = ImageDraw.Draw(pipe_out)
+        textheight=84
+        font = ImageFont.truetype(r"OpenSans-Bold.ttf", textheight)
+        textwidth = draw.textlength(title,font)
+        x = (width - textwidth) // 2
+        y = (height - textheight) // 2
+        draw.text((x, y), title,font=font)
     if time == 0.0:
         return img
             pipe.fuse_lora()
             pipe.set_adapters("motion", [0.7])
         last_motion = motion
+    return video_pipe(
         prompt=p1,
         negative_prompt=p2,
+        image=img,
         num_inference_steps=step,
         guidance_scale=accu,
+        num_videos_per_prompt=1,
         num_frames=(fps*time),
         need_safetycheck=False,
         generator=torch.Generator(device).manual_seed(int(str(random.random()).split(".")[1]))
     pipe_out = pipe_generate(*inp)
+    name = generate_random_string(12) + ( ".png" if time == 0 else ".mp4" )
     if inp[4] == 0.0:
         pipe_out.save(name)
     else:
+        export_to_video(pipe_out,name,fps=fps)
     return name
 def ui():
     global result
     with gr.Blocks(theme=gr.themes.Soft(),css=css,js=js) as demo:
         gr.Markdown(f"""
+            # MULTI-LANGUAGE MP4/PNG CREATOR
         """)
         with gr.Row(elem_id="col-container"):
             with gr.Column():
                         maximum=600.0,
                         value=0.0,
                         step=5.0,
+                        label="MP4/PNG Duration (0s = PNG)"
                     )
                 with gr.Row():
                         motion = gr.Dropdown(