yaron123 commited on
Commit
446a991
·
1 Parent(s): 8af55c9
Files changed (1) hide show
  1. app.py +77 -60
app.py CHANGED
@@ -435,8 +435,8 @@ pegasus_name = "google/pegasus-xsum"
435
  # precision data
436
 
437
  seq=512
438
- width=1024
439
- height=1024
440
  image_steps=8
441
  img_accu=0
442
 
@@ -511,8 +511,8 @@ pegasus_model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-
511
  @spaces.GPU(duration=180)
512
  def upscaler(
513
  input_image: Image.Image,
514
- prompt: str = "masterpiece, best quality, highres",
515
- negative_prompt: str = "worst quality, low quality, normal quality",
516
  seed: int = int(str(random.random()).split(".")[1]),
517
  upscale_factor: int = 4,
518
  controlnet_scale: float = 0.6,
@@ -521,7 +521,7 @@ def upscaler(
521
  tile_width: int = 112,
522
  tile_height: int = 144,
523
  denoise_strength: float = 0.35,
524
- num_inference_steps: int = 18,
525
  solver: str = "DDIM",
526
  ) -> Image.Image:
527
 
@@ -554,7 +554,7 @@ def upscaler(
554
 
555
  @spaces.GPU(duration=180)
556
  def summarize_text(
557
- text, max_length=30, num_beams=16, early_stopping=True
558
  ):
559
  log(f'CALL summarize_text')
560
  summary = pegasus_tokenizer.decode( pegasus_model.generate(
@@ -573,18 +573,43 @@ def generate_random_string(length):
573
  @spaces.GPU(duration=180)
574
  def pipe_generate(p1,p2):
575
  log(f'CALL pipe_generate')
576
- img = image_pipe(
577
  prompt=p1,
578
  negative_prompt=p2,
579
  height=height,
580
  width=width,
581
  guidance_scale=img_accu,
582
- num_images_per_prompt=1,
583
  num_inference_steps=image_steps,
584
  max_sequence_length=seq,
585
  generator=torch.Generator(device).manual_seed(int(str(random.random()).split(".")[1]))
586
- ).images[0]
587
  log(f'RET pipe_generate')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
588
  return img
589
 
590
  def handle_generate(artist,song,genre,lyrics):
@@ -596,9 +621,9 @@ def handle_generate(artist,song,genre,lyrics):
596
  pos_song = ' '.join(word[0].upper() + word[1:] for word in pos_song.split())
597
  pos_genre = re.sub(f'[{punctuation}]', '', re.sub("([ \t\n]){1,}", " ", genre)).upper().strip()
598
  pos_lyrics = re.sub(f'[{punctuation}]', '', re.sub("([ \t\n]){1,}", " ", lyrics)).lower().strip()
599
- pos_lyrics_sum = summarize_text(pos_lyrics)
600
  neg = f"Textual Labeled Distorted Discontinuous Ugly Blurry Low-Quality Worst-Quality Low-Resolution Painted"
601
- pos = f'Realistic Vivid Genuine Reasonable Detailed 4K { pos_genre } GENRE { pos_song }: "{ pos_lyrics_sum }"'
602
 
603
  print(f"""
604
  Positive: {pos}
@@ -606,34 +631,17 @@ def handle_generate(artist,song,genre,lyrics):
606
  Negative: {neg}
607
  """)
608
 
609
- img = pipe_generate(pos,neg)
610
-
611
- draw = ImageDraw.Draw(img)
612
-
613
- rows = 1
614
- labels_distance = math.ceil(1 / 3)
615
-
616
- textheight=min(math.ceil( width / 10 ), math.ceil( height / 5 ))
617
- font = ImageFont.truetype(r"Alef-Bold.ttf", textheight)
618
- textwidth = draw.textlength(pos_song,font)
619
- x = math.ceil((width - textwidth) / 2)
620
- y = height - math.ceil(textheight * rows / 2)
621
- y = y - math.ceil(y / labels_distance)
622
- draw.text((x, y), pos_song, (255,255,255), font=font, spacing=2, stroke_width=4, stroke_fill=(0,0,0))
623
-
624
- textheight=min(math.ceil( width / 12 ), math.ceil( height / 6 ))
625
- font = ImageFont.truetype(r"Alef-Bold.ttf", textheight)
626
- textwidth = draw.textlength(pos_artist,font)
627
- x = math.ceil((width - textwidth) / 2)
628
- y = height - math.ceil(textheight * rows / 2)
629
- y = y + math.ceil(y / labels_distance)
630
- draw.text((x, y), pos_artist, (0,0,0), font=font, spacing=6, stroke_width=8, stroke_fill=(255,255,255))
631
-
632
- enhanced_img = upscaler(img)
633
-
634
- name = generate_random_string(12) + ".png"
635
- enhanced_img.save(name)
636
- return name
637
 
638
  # entry
639
 
@@ -643,36 +651,45 @@ if __name__ == "__main__":
643
  # Song Cover Image Generator
644
  """)
645
  with gr.Column():
646
- with gr.Row():
647
- artist = gr.Textbox(
648
- placeholder="Artist name",
649
- container=False,
650
- max_lines=1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
651
  )
652
- song = gr.Textbox(
653
- placeholder="Song name",
 
654
  container=False,
655
  max_lines=1
656
  )
657
- genre = gr.Textbox(
658
- placeholder="Genre",
659
- container=False,
660
- max_lines=1
661
- )
662
- lyrics = gr.Textbox(
663
- placeholder="Lyrics (English)",
664
- container=False,
665
- max_lines=1
666
- )
667
- with gr.Column():
668
- cover = gr.Image(interactive=False,container=False,elem_classes="image-container", label="Result", show_label=True, type='filepath', show_share_button=False)
669
 
670
- run = gr.Button("Generate",elem_classes="btn")
 
 
 
 
 
 
671
 
672
  run.click(
673
  fn=handle_generate,
674
  inputs=[artist,song,genre,lyrics],
675
- outputs=[cover]
676
  )
677
 
678
  demo.queue().launch()
 
435
  # precision data
436
 
437
  seq=512
438
+ width=768
439
+ height=768
440
  image_steps=8
441
  img_accu=0
442
 
 
511
  @spaces.GPU(duration=180)
512
  def upscaler(
513
  input_image: Image.Image,
514
+ prompt: str = "Best-Quality Realistic Genuine Reasonable Highly-Detailed",
515
+ negative_prompt: str = "Distorted Discontinuous Ugly Blurry Low-Quality Worst-Quality Normal-Quality Low-Resolution Painted",
516
  seed: int = int(str(random.random()).split(".")[1]),
517
  upscale_factor: int = 4,
518
  controlnet_scale: float = 0.6,
 
521
  tile_width: int = 112,
522
  tile_height: int = 144,
523
  denoise_strength: float = 0.35,
524
+ num_inference_steps: int = 30,
525
  solver: str = "DDIM",
526
  ) -> Image.Image:
527
 
 
554
 
555
  @spaces.GPU(duration=180)
556
  def summarize_text(
557
+ text, max_length=30, num_beams=4, early_stopping=True
558
  ):
559
  log(f'CALL summarize_text')
560
  summary = pegasus_tokenizer.decode( pegasus_model.generate(
 
573
  @spaces.GPU(duration=180)
574
  def pipe_generate(p1,p2):
575
  log(f'CALL pipe_generate')
576
+ imgs = image_pipe(
577
  prompt=p1,
578
  negative_prompt=p2,
579
  height=height,
580
  width=width,
581
  guidance_scale=img_accu,
582
+ num_images_per_prompt=6,
583
  num_inference_steps=image_steps,
584
  max_sequence_length=seq,
585
  generator=torch.Generator(device).manual_seed(int(str(random.random()).split(".")[1]))
586
+ ).images
587
  log(f'RET pipe_generate')
588
+ return imgs
589
+
590
+ def add_song_cover_text(img,artist,song):
591
+
592
+ draw = ImageDraw.Draw(img)
593
+
594
+ rows = 1
595
+ labels_distance = 1 / 2.5
596
+
597
+ textheight=min(math.ceil( width / 10 ), math.ceil( height / 5 ))
598
+ font = ImageFont.truetype(r"Alef-Bold.ttf", textheight)
599
+ textwidth = draw.textlength(song,font)
600
+ x = math.ceil((width - textwidth) / 2)
601
+ y = height - math.ceil(textheight * rows / 2)
602
+ y = y - math.ceil(y * labels_distance)
603
+ draw.text((x, y), song, (255,255,255), font=font, spacing=2, stroke_width=4, stroke_fill=(0,0,0))
604
+
605
+ textheight=min(math.ceil( width / 12 ), math.ceil( height / 6 ))
606
+ font = ImageFont.truetype(r"Alef-Bold.ttf", textheight)
607
+ textwidth = draw.textlength(artist,font)
608
+ x = math.ceil((width - textwidth) / 2)
609
+ y = height - math.ceil(textheight * rows / 2)
610
+ y = y + math.ceil(y * labels_distance)
611
+ draw.text((x, y), artist, (0,0,0), font=font, spacing=4, stroke_width=2, stroke_fill=(255,255,255))
612
+
613
  return img
614
 
615
  def handle_generate(artist,song,genre,lyrics):
 
621
  pos_song = ' '.join(word[0].upper() + word[1:] for word in pos_song.split())
622
  pos_genre = re.sub(f'[{punctuation}]', '', re.sub("([ \t\n]){1,}", " ", genre)).upper().strip()
623
  pos_lyrics = re.sub(f'[{punctuation}]', '', re.sub("([ \t\n]){1,}", " ", lyrics)).lower().strip()
624
+ pos_lyrics_sum = pos_lyrics == "" if pos_lyrics else summarize_text(pos_lyrics)
625
  neg = f"Textual Labeled Distorted Discontinuous Ugly Blurry Low-Quality Worst-Quality Low-Resolution Painted"
626
+ pos = f'Realistic Vivid Genuine Reasonable Highly-Detailed 4K { pos_genre } SONG { pos_song } { pos_lyrics_sum == "" if "INSTRUMENTAL" else "\"" + pos_lyrics_sum + "\"" }'
627
 
628
  print(f"""
629
  Positive: {pos}
 
631
  Negative: {neg}
632
  """)
633
 
634
+ imgs = pipe_generate(pos,neg)
635
+
636
+ names = []
637
+ index = 1
638
+ for img in imgs:
639
+ labeled_img = add_song_cover_text(img)
640
+ enhanced_img = upscaler(labeled_img)
641
+ name = f'{pos_artist} - {pos_song} ({index}).png'
642
+ enhanced_img.save(name)
643
+ names.append(name)
644
+ return *names
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
645
 
646
  # entry
647
 
 
651
  # Song Cover Image Generator
652
  """)
653
  with gr.Column():
654
+ with gr.Column():
655
+ with gr.Row():
656
+ artist = gr.Textbox(
657
+ placeholder="Artist name",
658
+ value="",
659
+ container=False,
660
+ max_lines=1
661
+ )
662
+ song = gr.Textbox(
663
+ placeholder="Song name",
664
+ value="",
665
+ container=False,
666
+ max_lines=1
667
+ )
668
+ genre = gr.Textbox(
669
+ placeholder="Genre",
670
+ value="",
671
+ container=False,
672
+ max_lines=1
673
  )
674
+ lyrics = gr.Textbox(
675
+ placeholder="Lyrics (English)",
676
+ value="",
677
  container=False,
678
  max_lines=1
679
  )
 
 
 
 
 
 
 
 
 
 
 
 
680
 
681
+ run = gr.Button("Generate",elem_classes="btn")
682
+ cover1 = gr.Image(interactive=False,container=False,elem_classes="image-container", label="Result", show_label=True, type='filepath', show_share_button=False)
683
+ cover2 = gr.Image(interactive=False,container=False,elem_classes="image-container", label="Result", show_label=True, type='filepath', show_share_button=False)
684
+ cover3 = gr.Image(interactive=False,container=False,elem_classes="image-container", label="Result", show_label=True, type='filepath', show_share_button=False)
685
+ cover4 = gr.Image(interactive=False,container=False,elem_classes="image-container", label="Result", show_label=True, type='filepath', show_share_button=False)
686
+ cover5 = gr.Image(interactive=False,container=False,elem_classes="image-container", label="Result", show_label=True, type='filepath', show_share_button=False)
687
+ cover6 = gr.Image(interactive=False,container=False,elem_classes="image-container", label="Result", show_label=True, type='filepath', show_share_button=False)
688
 
689
  run.click(
690
  fn=handle_generate,
691
  inputs=[artist,song,genre,lyrics],
692
+ outputs=[cover1,cover2,cover3,cover4,cover5,cover6]
693
  )
694
 
695
  demo.queue().launch()