Kokoro-API-2

Sleeping

App Files Files Community

Yaron Koresh commited on Jan 15

Commit

b32fdcf

verified ·

1 Parent(s): 7d86ea4

Update app.py

Browse files

Files changed (1) hide show

app.py +88 -53

app.py CHANGED Viewed

@@ -24,31 +24,28 @@ from lxml.html import fromstring
 from diffusers.utils import export_to_gif, load_image
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file, save_file
-from diffusers import FluxPipeline, DiffusionPipeline, AnimateDiffPipeline, MotionAdapter, EulerAncestralDiscreteScheduler, DDIMScheduler, StableDiffusionXLPipeline, UNet2DConditionModel, AutoencoderKL, UNet3DConditionModel
 # logging
 warnings.filterwarnings("ignore")
 root = logging.getLogger()
 root.setLevel(logging.DEBUG)
-handler = logging.StreamHandler(sys.stdout)
 handler.setLevel(logging.DEBUG)
 formatter = logging.Formatter('\n >>> [%(levelname)s] %(asctime)s %(name)s: %(message)s\n')
 handler.setFormatter(formatter)
 root.addHandler(handler)
-handler2 = logging.StreamHandler(sys.stderr)
-handler2.setLevel(logging.DEBUG)
-formatter = logging.Formatter('\n >>> [%(levelname)s] %(asctime)s %(name)s: %(message)s\n')
-handler2.setFormatter(formatter)
-root.addHandler(handler2)
 # constant data
-dtype = torch.float16
 if torch.cuda.is_available():
     device = "cuda"
 else:
     device = "cpu"
 base = "SG161222/Realistic_Vision_V6.0_B1_noVAE"
 adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5-3", torch_dtype=dtype, device=device)
@@ -61,11 +58,10 @@ result = []
 seq=512
 fast=True
-fps=20
-time=3
 width=896
 height=896
-step=50
 accu=8.5
 # ui data
@@ -112,7 +108,8 @@ function custom(){
 # torch pipes
-pipe = AnimateDiffPipeline.from_pretrained(base, motion_adapter=adapter).to(device)
 pipe.scheduler = DDIMScheduler(
     clip_sample=False,
     beta_start=0.00085,
@@ -138,8 +135,8 @@ def xpath_finder(str,pattern):
 def translate(text,lang):
     if text == None or lang == None:
         return ""
-    text = re.sub(f'[{punctuation}]', '', re.sub('[\s+]', ' ', text)).lower().strip()
-    lang = re.sub(f'[{punctuation}]', '', re.sub('[\s+]', ' ', lang)).lower().strip()
     if text == "" or lang == "":
         return ""
     if len(text) > 38:
@@ -151,7 +148,7 @@ def translate(text,lang):
         'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15',
         'Mozilla/5.0 (Macintosh; Intel Mac OS X 13_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15'
     ]
-    padded_chars = re.sub("[(^\-)(\-$)]","",text.replace("","-").replace("- -"," ")).strip()
     query_text = f'Please translate {padded_chars}, into {lang}'
     url = f'https://www.google.com/search?q={query_text}'
     content = str(requests.get(
@@ -167,40 +164,43 @@ def translate(text,lang):
     trgt_text = xpath_finder(content,'//*[@id="tw-target-text"]/*')
     if trgt_lang == lang:
         translated = trgt_text
-    ret = re.sub(f'[{punctuation}]', '', re.sub('[\s+]', ' ', translated)).lower().strip()
-    print(ret)
     return ret
 def generate_random_string(length):
     characters = str(ascii_letters + digits)
     return ''.join(random.choice(characters) for _ in range(length))
-def pipe_generate(img,p1,p2,motion):
     global last_motion
     global pipe
-    if last_motion != motion:
-        if last_motion != "":
-            pipe.unload_lora_weights()
-        if motion != "":
-            pipe.load_lora_weights(motion, adapter_name="motion")
-            pipe.fuse_lora()
-            pipe.set_adapters("motion", [0.7])
-        last_motion = motion
-    pipe.to(device,dtype=dtype)
-    if img == None:
-        img = pipe(
             prompt=p1,
             height=height,
             width=width,
             guidance_scale=accu,
             num_inference_steps=step,
             max_sequence_length=seq,
             generator=torch.Generator(device).manual_seed(0)
         ).images[0]
     return pipe(
         prompt=p1,
         negative_prompt=p2,
@@ -210,32 +210,54 @@ def pipe_generate(img,p1,p2,motion):
         num_inference_steps=step,
         guidance_scale=accu,
         num_frames=(fps*time)
-    )
-def handle_generate(*inp):
-    inp = list(inp)
     inp[1] = translate(inp[1],"english")
     inp[2] = translate(inp[2],"english")
     if inp[2] != "":
-        inp[2] = f", {inp[2]}"
-    inp[2] = f"(deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime), text, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck{inp[2]}"
-    _do = ['photographed', 'realistic', 'dynamic poze', 'deep field', 'reasonable', "natural", 'rough', 'best quality', 'focused', "highly detailed"]
     if inp[1] != "":
-        _do.append(f"a new {inp[1]} content in the image")
     inp[1] = ", ".join(_do)
     ln = len(result)
     pipe_out = [pipe_generate(*inp) for i in range(ln)]
     names = []
     for i in pipe_out:
-        name = generate_random_string(12)+".png"
-        export_to_gif(i.frames[0],name,fps=fps)
         names.append( name )
     return names
@@ -243,31 +265,45 @@ def ui():
     global result
     with gr.Blocks(theme=gr.themes.Soft(),css=css,js=js) as demo:
         gr.Markdown(f"""
-            # MULTI-LANGUAGE GIF CREATOR
         """)
         with gr.Row(elem_id="col-container"):
             with gr.Column():
                 with gr.Row():
-                    img = gr.Image(label="UPLOAD PHOTO",show_label=True,container=True,type="pil")
                 with gr.Row():
                     prompt = gr.Textbox(
                         elem_id="prompt",
-                        placeholder="INCLUDE",
                         container=False,
                         max_lines=1
                     )
                 with gr.Row():
                     prompt2 = gr.Textbox(
                         elem_id="prompt2",
-                        placeholder="EXCLUDE",
                         container=False,
                         max_lines=1
                     )
-                with gr.Row(visible=False):
                         motion = gr.Dropdown(
-                            label='CAMERA',
                             show_label=True,
-                            container=True,
                             choices=[
                                 ("(No Effect)", ""),
                                 ("Zoom in", "guoyww/#animatediff-motion-lora-zoom-in"),
@@ -282,11 +318,10 @@ def ui():
                             value="",
                             interactive=True
                         )
-            with gr.Column():
-                with gr.Row():
-                    run_button = gr.Button("START",elem_classes="btn",scale=0)
                 with gr.Row():
-                    result.append(gr.Image(interactive=False,elem_classes="image-container", label="Result", show_label=False, type='filepath', show_share_button=False))
         gr.on(
             triggers=[
@@ -295,7 +330,7 @@ def ui():
                 prompt2.submit
             ],
             fn=handle_generate,
-            inputs=[img,prompt,prompt2,motion],
             outputs=result
         )
         demo.queue().launch()
@@ -306,4 +341,4 @@ if __name__ == "__main__":
     os.chdir(os.path.abspath(os.path.dirname(__file__)))
     ui()
-# end

 from diffusers.utils import export_to_gif, load_image
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file, save_file
+from diffusers import AnimateDiffPipeline, MotionAdapter, DDIMScheduler, StableDiffusionPipeline
 # logging
 warnings.filterwarnings("ignore")
 root = logging.getLogger()
 root.setLevel(logging.DEBUG)
+handler = logging.StreamHandler(sys.stderr)
 handler.setLevel(logging.DEBUG)
 formatter = logging.Formatter('\n >>> [%(levelname)s] %(asctime)s %(name)s: %(message)s\n')
 handler.setFormatter(formatter)
 root.addHandler(handler)
 # constant data
 if torch.cuda.is_available():
     device = "cuda"
+    dtype = torch.float16
 else:
     device = "cpu"
+    dtype = torch.float16
 base = "SG161222/Realistic_Vision_V6.0_B1_noVAE"
 adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5-3", torch_dtype=dtype, device=device)
 seq=512
 fast=True
+fps=18
 width=896
 height=896
+step=30
 accu=8.5
 # ui data
 # torch pipes
+image_pipe = StableDiffusionPipeline.from_pretrained(base, torch_dtype=dtype, safety_checker=None).to(device)
+pipe = AnimateDiffPipeline.from_pretrained(base, torch_dtype=dtype, motion_adapter=adapter).to(device)
 pipe.scheduler = DDIMScheduler(
     clip_sample=False,
     beta_start=0.00085,
 def translate(text,lang):
     if text == None or lang == None:
         return ""
+    text = re.sub(f'[{punctuation}]', '', re.sub('[ ]+', ' ', text)).lower().strip()
+    lang = re.sub(f'[{punctuation}]', '', re.sub('[ ]+', ' ', lang)).lower().strip()
     if text == "" or lang == "":
         return ""
     if len(text) > 38:
         'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15',
         'Mozilla/5.0 (Macintosh; Intel Mac OS X 13_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15'
     ]
+    padded_chars = re.sub("(^-)|(-$)","",text.replace("","-").replace("- -"," ")).strip()
     query_text = f'Please translate {padded_chars}, into {lang}'
     url = f'https://www.google.com/search?q={query_text}'
     content = str(requests.get(
     trgt_text = xpath_finder(content,'//*[@id="tw-target-text"]/*')
     if trgt_lang == lang:
         translated = trgt_text
+    ret = re.sub(f'[{punctuation}]', '', re.sub('[ ]+', ' ', translated)).lower().strip()
     return ret
 def generate_random_string(length):
     characters = str(ascii_letters + digits)
     return ''.join(random.choice(characters) for _ in range(length))
+def pipe_generate(img,p1,p2,motion,time,title):
     global last_motion
     global pipe
+    if img is None:
+        img = image_pipe(
             prompt=p1,
+            negative_prompt=p2,
             height=height,
             width=width,
             guidance_scale=accu,
+            num_images_per_prompt=1,
             num_inference_steps=step,
             max_sequence_length=seq,
+            need_safetycheck=False,
             generator=torch.Generator(device).manual_seed(0)
         ).images[0]
+    if time == 0.0:
+        return img
+    if last_motion != motion:
+        if last_motion != "":
+            pipe.unload_lora_weights()
+        if motion != "":
+            pipe.load_lora_weights(motion, adapter_name="motion")
+            pipe.fuse_lora()
+            pipe.set_adapters("motion", [0.7])
+        last_motion = motion
     return pipe(
         prompt=p1,
         negative_prompt=p2,
         num_inference_steps=step,
         guidance_scale=accu,
         num_frames=(fps*time)
+    ).frames[0]
+def handle_generate(*_inp):
+    inp = list(_inp)
     inp[1] = translate(inp[1],"english")
     inp[2] = translate(inp[2],"english")
     if inp[2] != "":
+        arr = []
+        for wrd in inp[2].split():
+            arr.append(wrd)
+        inp[2] = ", " + ", ".join(arr)
+    inp[2] = f"creative, fake, error, dreamy, unreal, pixelated, bright, deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, weird, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, unspecified, general, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck{inp[2]}"
+    _do = ['photographed', "environment", 'realistic', 'true', 'genuine', 'dynamic poze', 'authentic', 'deep field', 'reasonable', "natural", 'rough', "real world", 'best quality', 'focused', "highly detailed"]
     if inp[1] != "":
+        for wrd in inp[1].split():
+            _do.append(wrd)
     inp[1] = ", ".join(_do)
+    if inp[5] != "":
+        _do.append(f'centered readable large bold written text: {inp[5]}')
+    inp[1] = ", ".join(_do)
+    print(f"""
+        - - - -
+        {inp[1]}
+        {inp[2]}
+        - - - -
+    """)
     ln = len(result)
     pipe_out = [pipe_generate(*inp) for i in range(ln)]
     names = []
     for i in pipe_out:
+        name = generate_random_string(12) + ( ".png" if time == 0 else ".gif" )
+        if inp[4] == 0.0:
+            i.save(name)
+        else:
+            export_to_gif(i,name,fps=fps)
         names.append( name )
     return names
     global result
     with gr.Blocks(theme=gr.themes.Soft(),css=css,js=js) as demo:
         gr.Markdown(f"""
+            # MULTI-LANGUAGE GIF/PNG CREATOR
         """)
         with gr.Row(elem_id="col-container"):
             with gr.Column():
                 with gr.Row():
+                    img = gr.Image(label="Upload photo",show_label=True,container=False,type="pil")
+            with gr.Column(scale=0.66):
                 with gr.Row():
+                    title = gr.Textbox(
+                        placeholder="Logo title",
+                        container=False,
+                        max_lines=1
+                    )
                     prompt = gr.Textbox(
                         elem_id="prompt",
+                        placeholder="Included keywords",
                         container=False,
                         max_lines=1
                     )
                 with gr.Row():
                     prompt2 = gr.Textbox(
                         elem_id="prompt2",
+                        placeholder="Excluded keywords",
                         container=False,
                         max_lines=1
                     )
+                with gr.Row():
+                    time = gr.Slider(
+                        minimum=0.0,
+                        maximum=8.0,
+                        value=0.0,
+                        step=1.0,
+                        label="GIF/PNG Duration (0s = PNG)"
+                    )
+                with gr.Row():
                         motion = gr.Dropdown(
+                            label='GIF camera movement',
                             show_label=True,
+                            container=False,
                             choices=[
                                 ("(No Effect)", ""),
                                 ("Zoom in", "guoyww/#animatediff-motion-lora-zoom-in"),
                             value="",
                             interactive=True
                         )
                 with gr.Row():
+                    result.append(gr.Image(interactive=False,elem_classes="image-container", label="Result", show_label=True, type='filepath', show_share_button=False))
+        with gr.Row():
+            run_button = gr.Button("Start!",elem_classes="btn",scale=0)
         gr.on(
             triggers=[
                 prompt2.submit
             ],
             fn=handle_generate,
+            inputs=[img,prompt,prompt2,motion,time,title],
             outputs=result
         )
         demo.queue().launch()
     os.chdir(os.path.abspath(os.path.dirname(__file__)))
     ui()
+# end