Spaces:
Sleeping
Sleeping
commit
Browse files
app.py
CHANGED
|
@@ -435,8 +435,8 @@ pegasus_name = "google/pegasus-xsum"
|
|
| 435 |
# precision data
|
| 436 |
|
| 437 |
seq=512
|
| 438 |
-
width=
|
| 439 |
-
height=
|
| 440 |
image_steps=8
|
| 441 |
img_accu=0
|
| 442 |
|
|
@@ -511,8 +511,8 @@ pegasus_model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-
|
|
| 511 |
@spaces.GPU(duration=180)
|
| 512 |
def upscaler(
|
| 513 |
input_image: Image.Image,
|
| 514 |
-
prompt: str = "
|
| 515 |
-
negative_prompt: str = "
|
| 516 |
seed: int = int(str(random.random()).split(".")[1]),
|
| 517 |
upscale_factor: int = 4,
|
| 518 |
controlnet_scale: float = 0.6,
|
|
@@ -521,7 +521,7 @@ def upscaler(
|
|
| 521 |
tile_width: int = 112,
|
| 522 |
tile_height: int = 144,
|
| 523 |
denoise_strength: float = 0.35,
|
| 524 |
-
num_inference_steps: int =
|
| 525 |
solver: str = "DDIM",
|
| 526 |
) -> Image.Image:
|
| 527 |
|
|
@@ -554,7 +554,7 @@ def upscaler(
|
|
| 554 |
|
| 555 |
@spaces.GPU(duration=180)
|
| 556 |
def summarize_text(
|
| 557 |
-
text, max_length=30, num_beams=
|
| 558 |
):
|
| 559 |
log(f'CALL summarize_text')
|
| 560 |
summary = pegasus_tokenizer.decode( pegasus_model.generate(
|
|
@@ -573,18 +573,43 @@ def generate_random_string(length):
|
|
| 573 |
@spaces.GPU(duration=180)
|
| 574 |
def pipe_generate(p1,p2):
|
| 575 |
log(f'CALL pipe_generate')
|
| 576 |
-
|
| 577 |
prompt=p1,
|
| 578 |
negative_prompt=p2,
|
| 579 |
height=height,
|
| 580 |
width=width,
|
| 581 |
guidance_scale=img_accu,
|
| 582 |
-
num_images_per_prompt=
|
| 583 |
num_inference_steps=image_steps,
|
| 584 |
max_sequence_length=seq,
|
| 585 |
generator=torch.Generator(device).manual_seed(int(str(random.random()).split(".")[1]))
|
| 586 |
-
).images
|
| 587 |
log(f'RET pipe_generate')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 588 |
return img
|
| 589 |
|
| 590 |
def handle_generate(artist,song,genre,lyrics):
|
|
@@ -596,9 +621,9 @@ def handle_generate(artist,song,genre,lyrics):
|
|
| 596 |
pos_song = ' '.join(word[0].upper() + word[1:] for word in pos_song.split())
|
| 597 |
pos_genre = re.sub(f'[{punctuation}]', '', re.sub("([ \t\n]){1,}", " ", genre)).upper().strip()
|
| 598 |
pos_lyrics = re.sub(f'[{punctuation}]', '', re.sub("([ \t\n]){1,}", " ", lyrics)).lower().strip()
|
| 599 |
-
pos_lyrics_sum = summarize_text(pos_lyrics)
|
| 600 |
neg = f"Textual Labeled Distorted Discontinuous Ugly Blurry Low-Quality Worst-Quality Low-Resolution Painted"
|
| 601 |
-
pos = f'Realistic Vivid Genuine Reasonable Detailed 4K { pos_genre }
|
| 602 |
|
| 603 |
print(f"""
|
| 604 |
Positive: {pos}
|
|
@@ -606,34 +631,17 @@ def handle_generate(artist,song,genre,lyrics):
|
|
| 606 |
Negative: {neg}
|
| 607 |
""")
|
| 608 |
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
y = height - math.ceil(textheight * rows / 2)
|
| 621 |
-
y = y - math.ceil(y / labels_distance)
|
| 622 |
-
draw.text((x, y), pos_song, (255,255,255), font=font, spacing=2, stroke_width=4, stroke_fill=(0,0,0))
|
| 623 |
-
|
| 624 |
-
textheight=min(math.ceil( width / 12 ), math.ceil( height / 6 ))
|
| 625 |
-
font = ImageFont.truetype(r"Alef-Bold.ttf", textheight)
|
| 626 |
-
textwidth = draw.textlength(pos_artist,font)
|
| 627 |
-
x = math.ceil((width - textwidth) / 2)
|
| 628 |
-
y = height - math.ceil(textheight * rows / 2)
|
| 629 |
-
y = y + math.ceil(y / labels_distance)
|
| 630 |
-
draw.text((x, y), pos_artist, (0,0,0), font=font, spacing=6, stroke_width=8, stroke_fill=(255,255,255))
|
| 631 |
-
|
| 632 |
-
enhanced_img = upscaler(img)
|
| 633 |
-
|
| 634 |
-
name = generate_random_string(12) + ".png"
|
| 635 |
-
enhanced_img.save(name)
|
| 636 |
-
return name
|
| 637 |
|
| 638 |
# entry
|
| 639 |
|
|
@@ -643,36 +651,45 @@ if __name__ == "__main__":
|
|
| 643 |
# Song Cover Image Generator
|
| 644 |
""")
|
| 645 |
with gr.Column():
|
| 646 |
-
with gr.
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
|
| 650 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 651 |
)
|
| 652 |
-
|
| 653 |
-
placeholder="
|
|
|
|
| 654 |
container=False,
|
| 655 |
max_lines=1
|
| 656 |
)
|
| 657 |
-
genre = gr.Textbox(
|
| 658 |
-
placeholder="Genre",
|
| 659 |
-
container=False,
|
| 660 |
-
max_lines=1
|
| 661 |
-
)
|
| 662 |
-
lyrics = gr.Textbox(
|
| 663 |
-
placeholder="Lyrics (English)",
|
| 664 |
-
container=False,
|
| 665 |
-
max_lines=1
|
| 666 |
-
)
|
| 667 |
-
with gr.Column():
|
| 668 |
-
cover = gr.Image(interactive=False,container=False,elem_classes="image-container", label="Result", show_label=True, type='filepath', show_share_button=False)
|
| 669 |
|
| 670 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 671 |
|
| 672 |
run.click(
|
| 673 |
fn=handle_generate,
|
| 674 |
inputs=[artist,song,genre,lyrics],
|
| 675 |
-
outputs=[
|
| 676 |
)
|
| 677 |
|
| 678 |
demo.queue().launch()
|
|
|
|
| 435 |
# precision data
|
| 436 |
|
| 437 |
seq=512
|
| 438 |
+
width=768
|
| 439 |
+
height=768
|
| 440 |
image_steps=8
|
| 441 |
img_accu=0
|
| 442 |
|
|
|
|
| 511 |
@spaces.GPU(duration=180)
|
| 512 |
def upscaler(
|
| 513 |
input_image: Image.Image,
|
| 514 |
+
prompt: str = "Best-Quality Realistic Genuine Reasonable Highly-Detailed",
|
| 515 |
+
negative_prompt: str = "Distorted Discontinuous Ugly Blurry Low-Quality Worst-Quality Normal-Quality Low-Resolution Painted",
|
| 516 |
seed: int = int(str(random.random()).split(".")[1]),
|
| 517 |
upscale_factor: int = 4,
|
| 518 |
controlnet_scale: float = 0.6,
|
|
|
|
| 521 |
tile_width: int = 112,
|
| 522 |
tile_height: int = 144,
|
| 523 |
denoise_strength: float = 0.35,
|
| 524 |
+
num_inference_steps: int = 30,
|
| 525 |
solver: str = "DDIM",
|
| 526 |
) -> Image.Image:
|
| 527 |
|
|
|
|
| 554 |
|
| 555 |
@spaces.GPU(duration=180)
|
| 556 |
def summarize_text(
|
| 557 |
+
text, max_length=30, num_beams=4, early_stopping=True
|
| 558 |
):
|
| 559 |
log(f'CALL summarize_text')
|
| 560 |
summary = pegasus_tokenizer.decode( pegasus_model.generate(
|
|
|
|
| 573 |
@spaces.GPU(duration=180)
|
| 574 |
def pipe_generate(p1,p2):
|
| 575 |
log(f'CALL pipe_generate')
|
| 576 |
+
imgs = image_pipe(
|
| 577 |
prompt=p1,
|
| 578 |
negative_prompt=p2,
|
| 579 |
height=height,
|
| 580 |
width=width,
|
| 581 |
guidance_scale=img_accu,
|
| 582 |
+
num_images_per_prompt=6,
|
| 583 |
num_inference_steps=image_steps,
|
| 584 |
max_sequence_length=seq,
|
| 585 |
generator=torch.Generator(device).manual_seed(int(str(random.random()).split(".")[1]))
|
| 586 |
+
).images
|
| 587 |
log(f'RET pipe_generate')
|
| 588 |
+
return imgs
|
| 589 |
+
|
| 590 |
+
def add_song_cover_text(img,artist,song):
|
| 591 |
+
|
| 592 |
+
draw = ImageDraw.Draw(img)
|
| 593 |
+
|
| 594 |
+
rows = 1
|
| 595 |
+
labels_distance = 1 / 2.5
|
| 596 |
+
|
| 597 |
+
textheight=min(math.ceil( width / 10 ), math.ceil( height / 5 ))
|
| 598 |
+
font = ImageFont.truetype(r"Alef-Bold.ttf", textheight)
|
| 599 |
+
textwidth = draw.textlength(song,font)
|
| 600 |
+
x = math.ceil((width - textwidth) / 2)
|
| 601 |
+
y = height - math.ceil(textheight * rows / 2)
|
| 602 |
+
y = y - math.ceil(y * labels_distance)
|
| 603 |
+
draw.text((x, y), song, (255,255,255), font=font, spacing=2, stroke_width=4, stroke_fill=(0,0,0))
|
| 604 |
+
|
| 605 |
+
textheight=min(math.ceil( width / 12 ), math.ceil( height / 6 ))
|
| 606 |
+
font = ImageFont.truetype(r"Alef-Bold.ttf", textheight)
|
| 607 |
+
textwidth = draw.textlength(artist,font)
|
| 608 |
+
x = math.ceil((width - textwidth) / 2)
|
| 609 |
+
y = height - math.ceil(textheight * rows / 2)
|
| 610 |
+
y = y + math.ceil(y * labels_distance)
|
| 611 |
+
draw.text((x, y), artist, (0,0,0), font=font, spacing=4, stroke_width=2, stroke_fill=(255,255,255))
|
| 612 |
+
|
| 613 |
return img
|
| 614 |
|
| 615 |
def handle_generate(artist,song,genre,lyrics):
|
|
|
|
| 621 |
pos_song = ' '.join(word[0].upper() + word[1:] for word in pos_song.split())
|
| 622 |
pos_genre = re.sub(f'[{punctuation}]', '', re.sub("([ \t\n]){1,}", " ", genre)).upper().strip()
|
| 623 |
pos_lyrics = re.sub(f'[{punctuation}]', '', re.sub("([ \t\n]){1,}", " ", lyrics)).lower().strip()
|
| 624 |
+
pos_lyrics_sum = pos_lyrics == "" if pos_lyrics else summarize_text(pos_lyrics)
|
| 625 |
neg = f"Textual Labeled Distorted Discontinuous Ugly Blurry Low-Quality Worst-Quality Low-Resolution Painted"
|
| 626 |
+
pos = f'Realistic Vivid Genuine Reasonable Highly-Detailed 4K { pos_genre } SONG { pos_song } { pos_lyrics_sum == "" if "INSTRUMENTAL" else "\"" + pos_lyrics_sum + "\"" }'
|
| 627 |
|
| 628 |
print(f"""
|
| 629 |
Positive: {pos}
|
|
|
|
| 631 |
Negative: {neg}
|
| 632 |
""")
|
| 633 |
|
| 634 |
+
imgs = pipe_generate(pos,neg)
|
| 635 |
+
|
| 636 |
+
names = []
|
| 637 |
+
index = 1
|
| 638 |
+
for img in imgs:
|
| 639 |
+
labeled_img = add_song_cover_text(img)
|
| 640 |
+
enhanced_img = upscaler(labeled_img)
|
| 641 |
+
name = f'{pos_artist} - {pos_song} ({index}).png'
|
| 642 |
+
enhanced_img.save(name)
|
| 643 |
+
names.append(name)
|
| 644 |
+
return *names
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 645 |
|
| 646 |
# entry
|
| 647 |
|
|
|
|
| 651 |
# Song Cover Image Generator
|
| 652 |
""")
|
| 653 |
with gr.Column():
|
| 654 |
+
with gr.Column():
|
| 655 |
+
with gr.Row():
|
| 656 |
+
artist = gr.Textbox(
|
| 657 |
+
placeholder="Artist name",
|
| 658 |
+
value="",
|
| 659 |
+
container=False,
|
| 660 |
+
max_lines=1
|
| 661 |
+
)
|
| 662 |
+
song = gr.Textbox(
|
| 663 |
+
placeholder="Song name",
|
| 664 |
+
value="",
|
| 665 |
+
container=False,
|
| 666 |
+
max_lines=1
|
| 667 |
+
)
|
| 668 |
+
genre = gr.Textbox(
|
| 669 |
+
placeholder="Genre",
|
| 670 |
+
value="",
|
| 671 |
+
container=False,
|
| 672 |
+
max_lines=1
|
| 673 |
)
|
| 674 |
+
lyrics = gr.Textbox(
|
| 675 |
+
placeholder="Lyrics (English)",
|
| 676 |
+
value="",
|
| 677 |
container=False,
|
| 678 |
max_lines=1
|
| 679 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 680 |
|
| 681 |
+
run = gr.Button("Generate",elem_classes="btn")
|
| 682 |
+
cover1 = gr.Image(interactive=False,container=False,elem_classes="image-container", label="Result", show_label=True, type='filepath', show_share_button=False)
|
| 683 |
+
cover2 = gr.Image(interactive=False,container=False,elem_classes="image-container", label="Result", show_label=True, type='filepath', show_share_button=False)
|
| 684 |
+
cover3 = gr.Image(interactive=False,container=False,elem_classes="image-container", label="Result", show_label=True, type='filepath', show_share_button=False)
|
| 685 |
+
cover4 = gr.Image(interactive=False,container=False,elem_classes="image-container", label="Result", show_label=True, type='filepath', show_share_button=False)
|
| 686 |
+
cover5 = gr.Image(interactive=False,container=False,elem_classes="image-container", label="Result", show_label=True, type='filepath', show_share_button=False)
|
| 687 |
+
cover6 = gr.Image(interactive=False,container=False,elem_classes="image-container", label="Result", show_label=True, type='filepath', show_share_button=False)
|
| 688 |
|
| 689 |
run.click(
|
| 690 |
fn=handle_generate,
|
| 691 |
inputs=[artist,song,genre,lyrics],
|
| 692 |
+
outputs=[cover1,cover2,cover3,cover4,cover5,cover6]
|
| 693 |
)
|
| 694 |
|
| 695 |
demo.queue().launch()
|