Kokoro-API-5

Running

App Files Files Community

yaron123 commited on Jan 19

Commit

f3a00bf

1 Parent(s): 7e44dd0

commit

Browse files

Files changed (2) hide show

README.md +1 -1
app.py +68 -124

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: Photo Motion
 emoji: 💡
 colorFrom: yellow
 colorTo: gray

 ---
+title: Song Cover Image Generator
 emoji: 💡
 colorFrom: yellow
 colorTo: gray

app.py CHANGED Viewed

@@ -50,18 +50,15 @@ base = "black-forest-labs/FLUX.1-schnell"
 # variable data
-additional_image = None
 # precision data
 seq=512
-fps=15
-width=768
-height=512
 image_steps=8
-video_steps=50
-img_accu=9
-vid_accu=7
 # ui data
@@ -124,24 +121,8 @@ function custom(){
 # torch pipes
-def disabled_safety_checker(images, clip_input):
-    if len(images.shape)==4:
-        num_images = images.shape[0]
-        return images, [False]*num_images
-    else:
-        return images, False
 image_pipe = FluxPipeline.from_pretrained(base, torch_dtype=torch.bfloat16).to(device)
 image_pipe.enable_model_cpu_offload()
-image_pipe.safety_checker = None
-video_pipe = CogVideoXImageToVideoPipeline.from_pretrained(
-    "THUDM/CogVideoX-5b-I2V",
-    torch_dtype=torch.bfloat16
-).to(device)
-video_pipe.vae.enable_tiling()
-video_pipe.vae.enable_slicing()
-video_pipe.safety_checker = None
 # functionality
@@ -149,12 +130,9 @@ def generate_random_string(length):
     characters = str(ascii_letters + digits)
     return ''.join(random.choice(characters) for _ in range(length))
-@spaces.GPU(duration=80)
-def pipe_generate(img,p1,p2,time,title):
-    global pipe
-    if img is None:
-        img = image_pipe(
             prompt=p1,
             negative_prompt=p2,
             height=height,
@@ -164,133 +142,99 @@ def pipe_generate(img,p1,p2,time,title):
             num_inference_steps=image_steps,
             max_sequence_length=seq,
             generator=torch.Generator(device).manual_seed(int(str(random.random()).split(".")[1]))
-        ).images[0]
-        additional_image = True
-    if title != "":
-        draw = ImageDraw.Draw(img)
-        textheight=min(( width // 10 ), ( height // 5 ))
-        rows = 1
-        font = ImageFont.truetype(r"Alef-Bold.ttf", textheight)
-        textwidth = draw.textlength(title,font)
-        x = (width - textwidth) // 2
-        y = (height - (textheight * rows // 2)) // 2
-        draw.text((x, y), title, (255,255,255), font=font)
-    additional_image = img if additional_image else None
-    if time == 0.0:
-        return img
-    return video_pipe(
-        prompt=p1,
-        negative_prompt=p2.replace("textual content, ",""),
-        image=img,
-        num_inference_steps=video_steps,
-        guidance_scale=vid_accu,
-        num_videos_per_prompt=1,
-        num_frames=(fps*time),
-        generator=torch.Generator(device).manual_seed(int(str(random.random()).split(".")[1]))
-    ).frames[0]
-def handle_generate(*_inp):
-    additional_image = None
-    inp = list(_inp)
-    #inp[1] = translate(inp[1],"english")
-    #inp[2] = translate(inp[2],"english")
-    if len(inp[2]) >= 2:
-        inp[2] = "," + inp[2].strip(",").strip(" ")
-    inp[2] = f"textual,labeled,divined,distorted,discontinuous,ugly,blurry,low resolution,motionless,static,wrong body anatomy{inp[2]}"
-    if len(inp[1]) >= 2:
-        inp[1] = "," + inp[1].strip(",").strip(" ")
-    inp[1] = f'realistic,natural,genuine,reasonable,highly detailed{inp[1]}'
     print(f"""
         Positive: {inp[1]}
         Negative: {inp[2]}
     """)
-    pipe_out = pipe_generate(*inp)
-    name = generate_random_string(12) + ( ".png" if inp[3] == 0.0 else ".mp4" )
-    if inp[3] == 0.0:
-        pipe_out.save(name)
-    else:
-        export_to_video(pipe_out,name,fps=fps)
-    if inp[3] == 0.0:
-        return name, None
-    else:
-        return additional_image, name
 def ui():
-    global result
     with gr.Blocks(theme=gr.themes.Citrus(),css=css,js=js) as demo:
         gr.Markdown(f"""
-            # Photo Motion - PNG/MP4 Generator
         """)
         with gr.Row():
-            title = gr.Textbox(
-                placeholder="Logo title",
-                container=False,
-                max_lines=1
-            )
-            prompt = gr.Textbox(
-                elem_id="prompt",
-                placeholder="Included keywords",
-                container=False,
-                max_lines=1
-            )
         with gr.Row():
-            prompt2 = gr.Textbox(
-                elem_id="prompt2",
-                placeholder="Excluded keywords",
                 container=False,
                 max_lines=1
             )
         with gr.Row():
-            time = gr.Slider(
-                minimum=0.0,
-                maximum=3.0,
-                value=0.0,
-                step=1.0,
-                label="Duration (0s = PNG)"
-            )
-        with gr.Row(elem_id="col-container"):
-            with gr.Column():
-                img = gr.Image(label="Upload photo",show_label=True,container=False,type="pil")
-            with gr.Column():
-                res_img = gr.Image(interactive=False,container=False,elem_classes="image-container", label="Result", show_label=True, type='filepath', show_share_button=False)
-            with gr.Column():
-                res_vid = gr.Video(interactive=False,container=False,elem_classes="image-container", label="Result", show_label=True, show_share_button=False)
         with gr.Row():
-            run_button = gr.Button("Start!",elem_classes="btn",scale=0)
         gr.on(
             triggers=[
-                run_button.click,
-                prompt.submit,
-                prompt2.submit
             ],
             fn=handle_generate,
-            inputs=[img,prompt,prompt2,time,title],
-            outputs=[res_img,res_vid]
         )
         demo.queue().launch()
 # entry
 if __name__ == "__main__":
-    os.chdir(os.path.abspath(os.path.dirname(__file__)))
     ui()
-# end

 # variable data
 # precision data
 seq=512
+width=4320
+height=4320
 image_steps=8
+img_accu=0
 # ui data
 # torch pipes
 image_pipe = FluxPipeline.from_pretrained(base, torch_dtype=torch.bfloat16).to(device)
 image_pipe.enable_model_cpu_offload()
 # functionality
     characters = str(ascii_letters + digits)
     return ''.join(random.choice(characters) for _ in range(length))
+@spaces.GPU()
+def pipe_generate(p1,p2):
+    return image_pipe(
             prompt=p1,
             negative_prompt=p2,
             height=height,
             num_inference_steps=image_steps,
             max_sequence_length=seq,
             generator=torch.Generator(device).manual_seed(int(str(random.random()).split(".")[1]))
+    ).images[0]
+def handle_generate(artist,song,genre,lyrics):
+    pos_artist = re.sub("([ \t\n]){1,}", " ", artist).strip()
+    pos_song = re.sub("([ \t\n]){1,}", " ", song).strip()
+    pos_song = ' '.join(word[0].upper() + word[1:] for word in pos_song.split())
+    pos_genre = re.sub(f'[{punctuation}]', '', re.sub("([ \t\n]){1,}", " ", genre)).upper().strip()
+    pos_lyrics = re.sub(f'[{punctuation}]', '', re.sub("([ \t\n]){1,}", " ", genre)).lower().strip()
+    neg = f"Textual Labeled Distorted Discontinuous Ugly Blurry"
+    pos = f'Realistic Natural Genuine Reasonable Detailed { pos_genre } GENRE SONG COVER FOR { pos_song }: "{ pos_lyrics }"'
     print(f"""
         Positive: {inp[1]}
         Negative: {inp[2]}
     """)
+    img = pipe_generate(pos,neg)
+    draw = ImageDraw.Draw(img)
+    rows = 1
+    labes_distance = 1 // 3
+    textheight=min(( width // 10 ), ( height // 5 ))
+    font = ImageFont.truetype(r"Alef-Bold.ttf", textheight)
+    textwidth = draw.textlength(pos_song,font)
+    x = (width - textwidth) // 2
+    y = (height - (textheight * rows // 2)) // 2
+    y = y - (y // labes_distance)
+    draw.text((x, y), pos_song, (255,255,255), font=font)
+    textheight=min(( width // 12 ), ( height // 6 ))
+    font = ImageFont.truetype(r"Alef-Bold.ttf", textheight)
+    textwidth = draw.textlength(pos_artist,font)
+    x = (width - textwidth) // 2
+    y = (height - (textheight * rows // 2)) // 2
+    y = y + (y // labes_distance)
+    draw.text((x, y), pos_artist, (255,255,255), font=font)
+    name = generate_random_string(12) + ".png"
+    img.save(name)
+    return name
 def ui():
     with gr.Blocks(theme=gr.themes.Citrus(),css=css,js=js) as demo:
         gr.Markdown(f"""
+            # Song Cover Image Generator
         """)
         with gr.Row():
+            with gr.Column():
+                artist = gr.Textbox(
+                    placeholder="Artist name",
+                    container=False,
+                    max_lines=1
+                )
+            with gr.Column():
+                song = gr.Textbox(
+                    placeholder="Song name",
+                    container=False,
+                    max_lines=1
+                )
+            with gr.Column():
+                genre = gr.Textbox(
+                    placeholder="Genre",
+                    container=False,
+                    max_lines=1
+                )
         with gr.Row():
+            lyrics = gr.Textbox(
+                placeholder="Lyrics (English)",
                 container=False,
                 max_lines=1
             )
         with gr.Row():
+            cover = gr.Image(interactive=False,container=False,elem_classes="image-container", label="Result", show_label=True, type='filepath', show_share_button=False)
         with gr.Row():
+            run = gr.Button("Generate",elem_classes="btn")
         gr.on(
             triggers=[
+                run.click
             ],
             fn=handle_generate,
+            inputs=[artist,song,genre,lyrics],
+            outputs=[cover]
         )
         demo.queue().launch()
 # entry
 if __name__ == "__main__":
     ui()
+# end