yaron123 commited on
Commit
bf621da
·
1 Parent(s): d4b23f6
Files changed (1) hide show
  1. app.py +36 -24
app.py CHANGED
@@ -31,7 +31,7 @@ from huggingface_hub import hf_hub_download
31
  from safetensors.torch import load_file, save_file
32
  from diffusers import FluxPipeline
33
  from PIL import Image, ImageDraw, ImageFont
34
- from transformers import PegasusForConditionalGeneration, PegasusTokenizerFast
35
  from refiners.fluxion.utils import manual_seed
36
  from refiners.foundationals.latent_diffusion import Solver, solvers
37
  from refiners.foundationals.latent_diffusion.stable_diffusion_1.multi_upscaler import (
@@ -434,8 +434,8 @@ pegasus_name = "google/pegasus-xsum"
434
  # precision data
435
 
436
  seq=512
437
- width=720
438
- height=720
439
  image_steps=8
440
  img_accu=0
441
 
@@ -502,8 +502,8 @@ image_pipe.enable_model_cpu_offload()
502
  image_pipe.enable_vae_slicing()
503
  image_pipe.enable_vae_tiling()
504
 
505
- pegasus_tokenizer = PegasusTokenizerFast.from_pretrained("google/pegasus-xsum")
506
- pegasus_model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum")
507
 
508
  # functionality
509
 
@@ -512,14 +512,14 @@ def upscaler(
512
  prompt: str = "Photorealistic, Hyperrealistic, Realistic Photography, High-Quality Photography, Natural.",
513
  negative_prompt: str = "Distorted, Discontinuous, Blurry, Doll-Like, Overly-Plastic, Low-Quality, Painted, Smoothed, Artificial, Phony, Gaudy, Digital Effects.",
514
  seed: int = int(str(random.random()).split(".")[1]),
515
- upscale_factor: int = 4,
516
  controlnet_scale: float = 0.6,
517
  controlnet_decay: float = 1.0,
518
  condition_scale: int = 6,
519
  tile_width: int = 112,
520
  tile_height: int = 144,
521
  denoise_strength: float = 0.35,
522
- num_inference_steps: int = 50,
523
  solver: str = "DDIM",
524
  ) -> Image.Image:
525
 
@@ -542,7 +542,7 @@ def upscaler(
542
  tile_size=(tile_height, tile_width),
543
  denoise_strength=denoise_strength,
544
  num_inference_steps=num_inference_steps,
545
- loras_scale={"more_details": 0.5, "sdxl_render": 1.0},
546
  solver_type=solver_type,
547
  )
548
 
@@ -557,6 +557,9 @@ def summarize_text(
557
  summary = pegasus_tokenizer.decode( pegasus_model.generate(
558
  pegasus_tokenizer(text,return_tensors="pt").input_ids,
559
  max_length=max_length,
 
 
 
560
  early_stopping=True
561
  )[0], skip_special_tokens=True)
562
  log(f'RET summarize_text with summary as {summary}')
@@ -607,33 +610,33 @@ def add_song_cover_text(img,artist,song,height,width):
607
 
608
  return img
609
 
610
- @spaces.GPU(duration=180)
611
  def all_pipes(pos,neg,artist,song):
 
612
  imgs = pipe_generate_image(pos,neg)
613
 
614
- names = []
615
  index = 1
616
- for img in imgs:
617
- enhanced_img = upscaler(img)
618
- labeled_img = add_song_cover_text(enhanced_img,artist,song,height*4,width*4)
619
- name = f'{artist} - {song} ({index}).png'
620
- labeled_img.save(name)
621
- names.append(name)
622
- return names
623
 
624
- def handle_generate(artist,song,genre,lyrics):
 
 
625
 
626
  log(f'CALL handle_generate')
627
 
628
- pos_artist = re.sub("([ \t\n]){1,}", " ", artist).strip()
629
- pos_song = re.sub("([ \t\n]){1,}", " ", song).strip()
630
  pos_song = ' '.join(word[0].upper() + word[1:] for word in pos_song.split())
631
- pos_genre = re.sub(f'[{punctuation}]', '', re.sub("([ \t\n]){1,}", " ", genre)).upper().strip()
 
 
 
632
  pos_lyrics = re.sub(f'[{punctuation}]', '', re.sub("([ \t\n]){1,}", " ", lyrics)).lower().strip()
633
  pos_lyrics_sum = pos_lyrics if pos_lyrics == "" else summarize_text(pos_lyrics)
634
  neg = f"Sexual, Textual, Labeled, Distorted, Discontinuous, Blurry, Doll-Like, Overly-Plastic, Low-Quality, Painted, Smoothed, Artificial, Phony, Gaudy, Digital Effects."
635
  q = "\""
636
- pos = f'Photorealistic, Hyperrealistic, Realistic Photography, High-Quality Photography, Natural, made for the { pos_genre } SONG "{ pos_song }"{ pos_lyrics_sum if pos_lyrics_sum == "" else ": " + q + pos_lyrics_sum + q }.'
637
 
638
  print(f"""
639
  Positive: {pos}
@@ -641,7 +644,16 @@ def handle_generate(artist,song,genre,lyrics):
641
  Negative: {neg}
642
  """)
643
 
644
- return all_pipes(pos,neg,pos_artist,pos_song)[0]
 
 
 
 
 
 
 
 
 
645
 
646
  # entry
647
 
@@ -683,7 +695,7 @@ if __name__ == "__main__":
683
  run = gr.Button("Generate",elem_classes="btn")
684
 
685
  run.click(
686
- fn=handle_generate,
687
  inputs=[artist,song,genre,lyrics],
688
  outputs=[cover]
689
  )
 
31
  from safetensors.torch import load_file, save_file
32
  from diffusers import FluxPipeline
33
  from PIL import Image, ImageDraw, ImageFont
34
+ from transformers import PegasusForConditionalGeneration, PegasusTokenizer
35
  from refiners.fluxion.utils import manual_seed
36
  from refiners.foundationals.latent_diffusion import Solver, solvers
37
  from refiners.foundationals.latent_diffusion.stable_diffusion_1.multi_upscaler import (
 
434
  # precision data
435
 
436
  seq=512
437
+ width=1024
438
+ height=1024
439
  image_steps=8
440
  img_accu=0
441
 
 
502
  image_pipe.enable_vae_slicing()
503
  image_pipe.enable_vae_tiling()
504
 
505
+ pegasus_tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum")
506
+ pegasus_model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum").to(device)
507
 
508
  # functionality
509
 
 
512
  prompt: str = "Photorealistic, Hyperrealistic, Realistic Photography, High-Quality Photography, Natural.",
513
  negative_prompt: str = "Distorted, Discontinuous, Blurry, Doll-Like, Overly-Plastic, Low-Quality, Painted, Smoothed, Artificial, Phony, Gaudy, Digital Effects.",
514
  seed: int = int(str(random.random()).split(".")[1]),
515
+ upscale_factor: int = 2,
516
  controlnet_scale: float = 0.6,
517
  controlnet_decay: float = 1.0,
518
  condition_scale: int = 6,
519
  tile_width: int = 112,
520
  tile_height: int = 144,
521
  denoise_strength: float = 0.35,
522
+ num_inference_steps: int = 30,
523
  solver: str = "DDIM",
524
  ) -> Image.Image:
525
 
 
542
  tile_size=(tile_height, tile_width),
543
  denoise_strength=denoise_strength,
544
  num_inference_steps=num_inference_steps,
545
+ loras_scale={"more_details": 1.0, "sdxl_render": 1.0},
546
  solver_type=solver_type,
547
  )
548
 
 
557
  summary = pegasus_tokenizer.decode( pegasus_model.generate(
558
  pegasus_tokenizer(text,return_tensors="pt").input_ids,
559
  max_length=max_length,
560
+ num_beams = 2,
561
+ truncation=True,
562
+ padding='longest',
563
  early_stopping=True
564
  )[0], skip_special_tokens=True)
565
  log(f'RET summarize_text with summary as {summary}')
 
610
 
611
  return img
612
 
613
+ @spaces.GPU(duration=300)
614
  def all_pipes(pos,neg,artist,song):
615
+
616
  imgs = pipe_generate_image(pos,neg)
617
 
 
618
  index = 1
619
+ for i in range(len(imgs)):
620
+ imgs[i] = upscaler(imgs[i])
 
 
 
 
 
621
 
622
+ return imgs
623
+
624
+ def handle_generation(artist,song,genre,lyrics):
625
 
626
  log(f'CALL handle_generate')
627
 
628
+ pos_artist = re.sub("([ \t\n]){1,}", " ", artist).upper().strip()
629
+ pos_song = re.sub("([ \t\n]){1,}", " ", song).lower().strip()
630
  pos_song = ' '.join(word[0].upper() + word[1:] for word in pos_song.split())
631
+
632
+ pos_genre = re.sub(f'[{punctuation}]', '', re.sub("([ \t\n]){1,}", " ", genre)).lower().strip()
633
+ pos_genre = ' '.join(word[0].upper() + word[1:] for word in pos_genre.split())
634
+
635
  pos_lyrics = re.sub(f'[{punctuation}]', '', re.sub("([ \t\n]){1,}", " ", lyrics)).lower().strip()
636
  pos_lyrics_sum = pos_lyrics if pos_lyrics == "" else summarize_text(pos_lyrics)
637
  neg = f"Sexual, Textual, Labeled, Distorted, Discontinuous, Blurry, Doll-Like, Overly-Plastic, Low-Quality, Painted, Smoothed, Artificial, Phony, Gaudy, Digital Effects."
638
  q = "\""
639
+ pos = f'Photorealistic, Hyperrealistic, Realistic Photography, High-Quality Photography, Natural, made for the { pos_genre } song "{ pos_song }"{ pos_lyrics_sum if pos_lyrics_sum == "" else ": " + q + pos_lyrics_sum + q }.'
640
 
641
  print(f"""
642
  Positive: {pos}
 
644
  Negative: {neg}
645
  """)
646
 
647
+ imgs = all_pipes(pos,neg,pos_artist,pos_song)
648
+
649
+ names = []
650
+ for img in imgs:
651
+ labeled_img = add_song_cover_text(img,artist,song,height*4,width*4)
652
+ name = f'{artist} - {song} ({index}).png'
653
+ labeled_img.save(name)
654
+ names.append(name)
655
+
656
+ return names
657
 
658
  # entry
659
 
 
695
  run = gr.Button("Generate",elem_classes="btn")
696
 
697
  run.click(
698
+ fn=handle_generation,
699
  inputs=[artist,song,genre,lyrics],
700
  outputs=[cover]
701
  )