Spaces:
Sleeping
Sleeping
commit
Browse files
app.py
CHANGED
@@ -435,8 +435,8 @@ pegasus_name = "google/pegasus-xsum"
|
|
435 |
# precision data
|
436 |
|
437 |
seq=512
|
438 |
-
width=
|
439 |
-
height=
|
440 |
image_steps=8
|
441 |
img_accu=0
|
442 |
|
@@ -511,8 +511,8 @@ pegasus_model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-
|
|
511 |
@spaces.GPU(duration=180)
|
512 |
def upscaler(
|
513 |
input_image: Image.Image,
|
514 |
-
prompt: str = "
|
515 |
-
negative_prompt: str = "
|
516 |
seed: int = int(str(random.random()).split(".")[1]),
|
517 |
upscale_factor: int = 4,
|
518 |
controlnet_scale: float = 0.6,
|
@@ -521,7 +521,7 @@ def upscaler(
|
|
521 |
tile_width: int = 112,
|
522 |
tile_height: int = 144,
|
523 |
denoise_strength: float = 0.35,
|
524 |
-
num_inference_steps: int =
|
525 |
solver: str = "DDIM",
|
526 |
) -> Image.Image:
|
527 |
|
@@ -554,7 +554,7 @@ def upscaler(
|
|
554 |
|
555 |
@spaces.GPU(duration=180)
|
556 |
def summarize_text(
|
557 |
-
text, max_length=30, num_beams=
|
558 |
):
|
559 |
log(f'CALL summarize_text')
|
560 |
summary = pegasus_tokenizer.decode( pegasus_model.generate(
|
@@ -573,18 +573,43 @@ def generate_random_string(length):
|
|
573 |
@spaces.GPU(duration=180)
|
574 |
def pipe_generate(p1,p2):
|
575 |
log(f'CALL pipe_generate')
|
576 |
-
|
577 |
prompt=p1,
|
578 |
negative_prompt=p2,
|
579 |
height=height,
|
580 |
width=width,
|
581 |
guidance_scale=img_accu,
|
582 |
-
num_images_per_prompt=
|
583 |
num_inference_steps=image_steps,
|
584 |
max_sequence_length=seq,
|
585 |
generator=torch.Generator(device).manual_seed(int(str(random.random()).split(".")[1]))
|
586 |
-
).images
|
587 |
log(f'RET pipe_generate')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
588 |
return img
|
589 |
|
590 |
def handle_generate(artist,song,genre,lyrics):
|
@@ -596,9 +621,9 @@ def handle_generate(artist,song,genre,lyrics):
|
|
596 |
pos_song = ' '.join(word[0].upper() + word[1:] for word in pos_song.split())
|
597 |
pos_genre = re.sub(f'[{punctuation}]', '', re.sub("([ \t\n]){1,}", " ", genre)).upper().strip()
|
598 |
pos_lyrics = re.sub(f'[{punctuation}]', '', re.sub("([ \t\n]){1,}", " ", lyrics)).lower().strip()
|
599 |
-
pos_lyrics_sum = summarize_text(pos_lyrics)
|
600 |
neg = f"Textual Labeled Distorted Discontinuous Ugly Blurry Low-Quality Worst-Quality Low-Resolution Painted"
|
601 |
-
pos = f'Realistic Vivid Genuine Reasonable Detailed 4K { pos_genre }
|
602 |
|
603 |
print(f"""
|
604 |
Positive: {pos}
|
@@ -606,34 +631,17 @@ def handle_generate(artist,song,genre,lyrics):
|
|
606 |
Negative: {neg}
|
607 |
""")
|
608 |
|
609 |
-
|
610 |
-
|
611 |
-
|
612 |
-
|
613 |
-
|
614 |
-
|
615 |
-
|
616 |
-
|
617 |
-
|
618 |
-
|
619 |
-
|
620 |
-
y = height - math.ceil(textheight * rows / 2)
|
621 |
-
y = y - math.ceil(y / labels_distance)
|
622 |
-
draw.text((x, y), pos_song, (255,255,255), font=font, spacing=2, stroke_width=4, stroke_fill=(0,0,0))
|
623 |
-
|
624 |
-
textheight=min(math.ceil( width / 12 ), math.ceil( height / 6 ))
|
625 |
-
font = ImageFont.truetype(r"Alef-Bold.ttf", textheight)
|
626 |
-
textwidth = draw.textlength(pos_artist,font)
|
627 |
-
x = math.ceil((width - textwidth) / 2)
|
628 |
-
y = height - math.ceil(textheight * rows / 2)
|
629 |
-
y = y + math.ceil(y / labels_distance)
|
630 |
-
draw.text((x, y), pos_artist, (0,0,0), font=font, spacing=6, stroke_width=8, stroke_fill=(255,255,255))
|
631 |
-
|
632 |
-
enhanced_img = upscaler(img)
|
633 |
-
|
634 |
-
name = generate_random_string(12) + ".png"
|
635 |
-
enhanced_img.save(name)
|
636 |
-
return name
|
637 |
|
638 |
# entry
|
639 |
|
@@ -643,36 +651,45 @@ if __name__ == "__main__":
|
|
643 |
# Song Cover Image Generator
|
644 |
""")
|
645 |
with gr.Column():
|
646 |
-
with gr.
|
647 |
-
|
648 |
-
|
649 |
-
|
650 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
651 |
)
|
652 |
-
|
653 |
-
placeholder="
|
|
|
654 |
container=False,
|
655 |
max_lines=1
|
656 |
)
|
657 |
-
genre = gr.Textbox(
|
658 |
-
placeholder="Genre",
|
659 |
-
container=False,
|
660 |
-
max_lines=1
|
661 |
-
)
|
662 |
-
lyrics = gr.Textbox(
|
663 |
-
placeholder="Lyrics (English)",
|
664 |
-
container=False,
|
665 |
-
max_lines=1
|
666 |
-
)
|
667 |
-
with gr.Column():
|
668 |
-
cover = gr.Image(interactive=False,container=False,elem_classes="image-container", label="Result", show_label=True, type='filepath', show_share_button=False)
|
669 |
|
670 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
671 |
|
672 |
run.click(
|
673 |
fn=handle_generate,
|
674 |
inputs=[artist,song,genre,lyrics],
|
675 |
-
outputs=[
|
676 |
)
|
677 |
|
678 |
demo.queue().launch()
|
|
|
435 |
# precision data
|
436 |
|
437 |
seq=512
|
438 |
+
width=768
|
439 |
+
height=768
|
440 |
image_steps=8
|
441 |
img_accu=0
|
442 |
|
|
|
511 |
@spaces.GPU(duration=180)
|
512 |
def upscaler(
|
513 |
input_image: Image.Image,
|
514 |
+
prompt: str = "Best-Quality Realistic Genuine Reasonable Highly-Detailed",
|
515 |
+
negative_prompt: str = "Distorted Discontinuous Ugly Blurry Low-Quality Worst-Quality Normal-Quality Low-Resolution Painted",
|
516 |
seed: int = int(str(random.random()).split(".")[1]),
|
517 |
upscale_factor: int = 4,
|
518 |
controlnet_scale: float = 0.6,
|
|
|
521 |
tile_width: int = 112,
|
522 |
tile_height: int = 144,
|
523 |
denoise_strength: float = 0.35,
|
524 |
+
num_inference_steps: int = 30,
|
525 |
solver: str = "DDIM",
|
526 |
) -> Image.Image:
|
527 |
|
|
|
554 |
|
555 |
@spaces.GPU(duration=180)
|
556 |
def summarize_text(
|
557 |
+
text, max_length=30, num_beams=4, early_stopping=True
|
558 |
):
|
559 |
log(f'CALL summarize_text')
|
560 |
summary = pegasus_tokenizer.decode( pegasus_model.generate(
|
|
|
573 |
@spaces.GPU(duration=180)
|
574 |
def pipe_generate(p1,p2):
|
575 |
log(f'CALL pipe_generate')
|
576 |
+
imgs = image_pipe(
|
577 |
prompt=p1,
|
578 |
negative_prompt=p2,
|
579 |
height=height,
|
580 |
width=width,
|
581 |
guidance_scale=img_accu,
|
582 |
+
num_images_per_prompt=6,
|
583 |
num_inference_steps=image_steps,
|
584 |
max_sequence_length=seq,
|
585 |
generator=torch.Generator(device).manual_seed(int(str(random.random()).split(".")[1]))
|
586 |
+
).images
|
587 |
log(f'RET pipe_generate')
|
588 |
+
return imgs
|
589 |
+
|
590 |
+
def add_song_cover_text(img,artist,song):
|
591 |
+
|
592 |
+
draw = ImageDraw.Draw(img)
|
593 |
+
|
594 |
+
rows = 1
|
595 |
+
labels_distance = 1 / 2.5
|
596 |
+
|
597 |
+
textheight=min(math.ceil( width / 10 ), math.ceil( height / 5 ))
|
598 |
+
font = ImageFont.truetype(r"Alef-Bold.ttf", textheight)
|
599 |
+
textwidth = draw.textlength(song,font)
|
600 |
+
x = math.ceil((width - textwidth) / 2)
|
601 |
+
y = height - math.ceil(textheight * rows / 2)
|
602 |
+
y = y - math.ceil(y * labels_distance)
|
603 |
+
draw.text((x, y), song, (255,255,255), font=font, spacing=2, stroke_width=4, stroke_fill=(0,0,0))
|
604 |
+
|
605 |
+
textheight=min(math.ceil( width / 12 ), math.ceil( height / 6 ))
|
606 |
+
font = ImageFont.truetype(r"Alef-Bold.ttf", textheight)
|
607 |
+
textwidth = draw.textlength(artist,font)
|
608 |
+
x = math.ceil((width - textwidth) / 2)
|
609 |
+
y = height - math.ceil(textheight * rows / 2)
|
610 |
+
y = y + math.ceil(y * labels_distance)
|
611 |
+
draw.text((x, y), artist, (0,0,0), font=font, spacing=4, stroke_width=2, stroke_fill=(255,255,255))
|
612 |
+
|
613 |
return img
|
614 |
|
615 |
def handle_generate(artist,song,genre,lyrics):
|
|
|
621 |
pos_song = ' '.join(word[0].upper() + word[1:] for word in pos_song.split())
|
622 |
pos_genre = re.sub(f'[{punctuation}]', '', re.sub("([ \t\n]){1,}", " ", genre)).upper().strip()
|
623 |
pos_lyrics = re.sub(f'[{punctuation}]', '', re.sub("([ \t\n]){1,}", " ", lyrics)).lower().strip()
|
624 |
+
pos_lyrics_sum = pos_lyrics == "" if pos_lyrics else summarize_text(pos_lyrics)
|
625 |
neg = f"Textual Labeled Distorted Discontinuous Ugly Blurry Low-Quality Worst-Quality Low-Resolution Painted"
|
626 |
+
pos = f'Realistic Vivid Genuine Reasonable Highly-Detailed 4K { pos_genre } SONG { pos_song } { pos_lyrics_sum == "" if "INSTRUMENTAL" else "\"" + pos_lyrics_sum + "\"" }'
|
627 |
|
628 |
print(f"""
|
629 |
Positive: {pos}
|
|
|
631 |
Negative: {neg}
|
632 |
""")
|
633 |
|
634 |
+
imgs = pipe_generate(pos,neg)
|
635 |
+
|
636 |
+
names = []
|
637 |
+
index = 1
|
638 |
+
for img in imgs:
|
639 |
+
labeled_img = add_song_cover_text(img)
|
640 |
+
enhanced_img = upscaler(labeled_img)
|
641 |
+
name = f'{pos_artist} - {pos_song} ({index}).png'
|
642 |
+
enhanced_img.save(name)
|
643 |
+
names.append(name)
|
644 |
+
return *names
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
645 |
|
646 |
# entry
|
647 |
|
|
|
651 |
# Song Cover Image Generator
|
652 |
""")
|
653 |
with gr.Column():
|
654 |
+
with gr.Column():
|
655 |
+
with gr.Row():
|
656 |
+
artist = gr.Textbox(
|
657 |
+
placeholder="Artist name",
|
658 |
+
value="",
|
659 |
+
container=False,
|
660 |
+
max_lines=1
|
661 |
+
)
|
662 |
+
song = gr.Textbox(
|
663 |
+
placeholder="Song name",
|
664 |
+
value="",
|
665 |
+
container=False,
|
666 |
+
max_lines=1
|
667 |
+
)
|
668 |
+
genre = gr.Textbox(
|
669 |
+
placeholder="Genre",
|
670 |
+
value="",
|
671 |
+
container=False,
|
672 |
+
max_lines=1
|
673 |
)
|
674 |
+
lyrics = gr.Textbox(
|
675 |
+
placeholder="Lyrics (English)",
|
676 |
+
value="",
|
677 |
container=False,
|
678 |
max_lines=1
|
679 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
680 |
|
681 |
+
run = gr.Button("Generate",elem_classes="btn")
|
682 |
+
cover1 = gr.Image(interactive=False,container=False,elem_classes="image-container", label="Result", show_label=True, type='filepath', show_share_button=False)
|
683 |
+
cover2 = gr.Image(interactive=False,container=False,elem_classes="image-container", label="Result", show_label=True, type='filepath', show_share_button=False)
|
684 |
+
cover3 = gr.Image(interactive=False,container=False,elem_classes="image-container", label="Result", show_label=True, type='filepath', show_share_button=False)
|
685 |
+
cover4 = gr.Image(interactive=False,container=False,elem_classes="image-container", label="Result", show_label=True, type='filepath', show_share_button=False)
|
686 |
+
cover5 = gr.Image(interactive=False,container=False,elem_classes="image-container", label="Result", show_label=True, type='filepath', show_share_button=False)
|
687 |
+
cover6 = gr.Image(interactive=False,container=False,elem_classes="image-container", label="Result", show_label=True, type='filepath', show_share_button=False)
|
688 |
|
689 |
run.click(
|
690 |
fn=handle_generate,
|
691 |
inputs=[artist,song,genre,lyrics],
|
692 |
+
outputs=[cover1,cover2,cover3,cover4,cover5,cover6]
|
693 |
)
|
694 |
|
695 |
demo.queue().launch()
|