Spaces:
Sleeping
Sleeping
commit
Browse files
app.py
CHANGED
@@ -31,7 +31,7 @@ from huggingface_hub import hf_hub_download
|
|
31 |
from safetensors.torch import load_file, save_file
|
32 |
from diffusers import FluxPipeline
|
33 |
from PIL import Image, ImageDraw, ImageFont
|
34 |
-
from transformers import PegasusForConditionalGeneration,
|
35 |
from refiners.fluxion.utils import manual_seed
|
36 |
from refiners.foundationals.latent_diffusion import Solver, solvers
|
37 |
from refiners.foundationals.latent_diffusion.stable_diffusion_1.multi_upscaler import (
|
@@ -434,8 +434,8 @@ pegasus_name = "google/pegasus-xsum"
|
|
434 |
# precision data
|
435 |
|
436 |
seq=512
|
437 |
-
width=
|
438 |
-
height=
|
439 |
image_steps=8
|
440 |
img_accu=0
|
441 |
|
@@ -502,8 +502,8 @@ image_pipe.enable_model_cpu_offload()
|
|
502 |
image_pipe.enable_vae_slicing()
|
503 |
image_pipe.enable_vae_tiling()
|
504 |
|
505 |
-
pegasus_tokenizer =
|
506 |
-
pegasus_model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum")
|
507 |
|
508 |
# functionality
|
509 |
|
@@ -512,14 +512,14 @@ def upscaler(
|
|
512 |
prompt: str = "Photorealistic, Hyperrealistic, Realistic Photography, High-Quality Photography, Natural.",
|
513 |
negative_prompt: str = "Distorted, Discontinuous, Blurry, Doll-Like, Overly-Plastic, Low-Quality, Painted, Smoothed, Artificial, Phony, Gaudy, Digital Effects.",
|
514 |
seed: int = int(str(random.random()).split(".")[1]),
|
515 |
-
upscale_factor: int =
|
516 |
controlnet_scale: float = 0.6,
|
517 |
controlnet_decay: float = 1.0,
|
518 |
condition_scale: int = 6,
|
519 |
tile_width: int = 112,
|
520 |
tile_height: int = 144,
|
521 |
denoise_strength: float = 0.35,
|
522 |
-
num_inference_steps: int =
|
523 |
solver: str = "DDIM",
|
524 |
) -> Image.Image:
|
525 |
|
@@ -542,7 +542,7 @@ def upscaler(
|
|
542 |
tile_size=(tile_height, tile_width),
|
543 |
denoise_strength=denoise_strength,
|
544 |
num_inference_steps=num_inference_steps,
|
545 |
-
loras_scale={"more_details": 0
|
546 |
solver_type=solver_type,
|
547 |
)
|
548 |
|
@@ -557,6 +557,9 @@ def summarize_text(
|
|
557 |
summary = pegasus_tokenizer.decode( pegasus_model.generate(
|
558 |
pegasus_tokenizer(text,return_tensors="pt").input_ids,
|
559 |
max_length=max_length,
|
|
|
|
|
|
|
560 |
early_stopping=True
|
561 |
)[0], skip_special_tokens=True)
|
562 |
log(f'RET summarize_text with summary as {summary}')
|
@@ -607,33 +610,33 @@ def add_song_cover_text(img,artist,song,height,width):
|
|
607 |
|
608 |
return img
|
609 |
|
610 |
-
@spaces.GPU(duration=
|
611 |
def all_pipes(pos,neg,artist,song):
|
|
|
612 |
imgs = pipe_generate_image(pos,neg)
|
613 |
|
614 |
-
names = []
|
615 |
index = 1
|
616 |
-
for
|
617 |
-
|
618 |
-
labeled_img = add_song_cover_text(enhanced_img,artist,song,height*4,width*4)
|
619 |
-
name = f'{artist} - {song} ({index}).png'
|
620 |
-
labeled_img.save(name)
|
621 |
-
names.append(name)
|
622 |
-
return names
|
623 |
|
624 |
-
|
|
|
|
|
625 |
|
626 |
log(f'CALL handle_generate')
|
627 |
|
628 |
-
pos_artist = re.sub("([ \t\n]){1,}", " ", artist).strip()
|
629 |
-
pos_song = re.sub("([ \t\n]){1,}", " ", song).strip()
|
630 |
pos_song = ' '.join(word[0].upper() + word[1:] for word in pos_song.split())
|
631 |
-
|
|
|
|
|
|
|
632 |
pos_lyrics = re.sub(f'[{punctuation}]', '', re.sub("([ \t\n]){1,}", " ", lyrics)).lower().strip()
|
633 |
pos_lyrics_sum = pos_lyrics if pos_lyrics == "" else summarize_text(pos_lyrics)
|
634 |
neg = f"Sexual, Textual, Labeled, Distorted, Discontinuous, Blurry, Doll-Like, Overly-Plastic, Low-Quality, Painted, Smoothed, Artificial, Phony, Gaudy, Digital Effects."
|
635 |
q = "\""
|
636 |
-
pos = f'Photorealistic, Hyperrealistic, Realistic Photography, High-Quality Photography, Natural, made for the { pos_genre }
|
637 |
|
638 |
print(f"""
|
639 |
Positive: {pos}
|
@@ -641,7 +644,16 @@ def handle_generate(artist,song,genre,lyrics):
|
|
641 |
Negative: {neg}
|
642 |
""")
|
643 |
|
644 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
645 |
|
646 |
# entry
|
647 |
|
@@ -683,7 +695,7 @@ if __name__ == "__main__":
|
|
683 |
run = gr.Button("Generate",elem_classes="btn")
|
684 |
|
685 |
run.click(
|
686 |
-
fn=
|
687 |
inputs=[artist,song,genre,lyrics],
|
688 |
outputs=[cover]
|
689 |
)
|
|
|
31 |
from safetensors.torch import load_file, save_file
|
32 |
from diffusers import FluxPipeline
|
33 |
from PIL import Image, ImageDraw, ImageFont
|
34 |
+
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
|
35 |
from refiners.fluxion.utils import manual_seed
|
36 |
from refiners.foundationals.latent_diffusion import Solver, solvers
|
37 |
from refiners.foundationals.latent_diffusion.stable_diffusion_1.multi_upscaler import (
|
|
|
434 |
# precision data
|
435 |
|
436 |
seq=512
|
437 |
+
width=1024
|
438 |
+
height=1024
|
439 |
image_steps=8
|
440 |
img_accu=0
|
441 |
|
|
|
502 |
image_pipe.enable_vae_slicing()
|
503 |
image_pipe.enable_vae_tiling()
|
504 |
|
505 |
+
pegasus_tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum")
|
506 |
+
pegasus_model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum").to(device)
|
507 |
|
508 |
# functionality
|
509 |
|
|
|
512 |
prompt: str = "Photorealistic, Hyperrealistic, Realistic Photography, High-Quality Photography, Natural.",
|
513 |
negative_prompt: str = "Distorted, Discontinuous, Blurry, Doll-Like, Overly-Plastic, Low-Quality, Painted, Smoothed, Artificial, Phony, Gaudy, Digital Effects.",
|
514 |
seed: int = int(str(random.random()).split(".")[1]),
|
515 |
+
upscale_factor: int = 2,
|
516 |
controlnet_scale: float = 0.6,
|
517 |
controlnet_decay: float = 1.0,
|
518 |
condition_scale: int = 6,
|
519 |
tile_width: int = 112,
|
520 |
tile_height: int = 144,
|
521 |
denoise_strength: float = 0.35,
|
522 |
+
num_inference_steps: int = 30,
|
523 |
solver: str = "DDIM",
|
524 |
) -> Image.Image:
|
525 |
|
|
|
542 |
tile_size=(tile_height, tile_width),
|
543 |
denoise_strength=denoise_strength,
|
544 |
num_inference_steps=num_inference_steps,
|
545 |
+
loras_scale={"more_details": 1.0, "sdxl_render": 1.0},
|
546 |
solver_type=solver_type,
|
547 |
)
|
548 |
|
|
|
557 |
summary = pegasus_tokenizer.decode( pegasus_model.generate(
|
558 |
pegasus_tokenizer(text,return_tensors="pt").input_ids,
|
559 |
max_length=max_length,
|
560 |
+
num_beams = 2,
|
561 |
+
truncation=True,
|
562 |
+
padding='longest',
|
563 |
early_stopping=True
|
564 |
)[0], skip_special_tokens=True)
|
565 |
log(f'RET summarize_text with summary as {summary}')
|
|
|
610 |
|
611 |
return img
|
612 |
|
613 |
+
@spaces.GPU(duration=300)
|
614 |
def all_pipes(pos,neg,artist,song):
|
615 |
+
|
616 |
imgs = pipe_generate_image(pos,neg)
|
617 |
|
|
|
618 |
index = 1
|
619 |
+
for i in range(len(imgs)):
|
620 |
+
imgs[i] = upscaler(imgs[i])
|
|
|
|
|
|
|
|
|
|
|
621 |
|
622 |
+
return imgs
|
623 |
+
|
624 |
+
def handle_generation(artist,song,genre,lyrics):
|
625 |
|
626 |
log(f'CALL handle_generate')
|
627 |
|
628 |
+
pos_artist = re.sub("([ \t\n]){1,}", " ", artist).upper().strip()
|
629 |
+
pos_song = re.sub("([ \t\n]){1,}", " ", song).lower().strip()
|
630 |
pos_song = ' '.join(word[0].upper() + word[1:] for word in pos_song.split())
|
631 |
+
|
632 |
+
pos_genre = re.sub(f'[{punctuation}]', '', re.sub("([ \t\n]){1,}", " ", genre)).lower().strip()
|
633 |
+
pos_genre = ' '.join(word[0].upper() + word[1:] for word in pos_genre.split())
|
634 |
+
|
635 |
pos_lyrics = re.sub(f'[{punctuation}]', '', re.sub("([ \t\n]){1,}", " ", lyrics)).lower().strip()
|
636 |
pos_lyrics_sum = pos_lyrics if pos_lyrics == "" else summarize_text(pos_lyrics)
|
637 |
neg = f"Sexual, Textual, Labeled, Distorted, Discontinuous, Blurry, Doll-Like, Overly-Plastic, Low-Quality, Painted, Smoothed, Artificial, Phony, Gaudy, Digital Effects."
|
638 |
q = "\""
|
639 |
+
pos = f'Photorealistic, Hyperrealistic, Realistic Photography, High-Quality Photography, Natural, made for the { pos_genre } song "{ pos_song }"{ pos_lyrics_sum if pos_lyrics_sum == "" else ": " + q + pos_lyrics_sum + q }.'
|
640 |
|
641 |
print(f"""
|
642 |
Positive: {pos}
|
|
|
644 |
Negative: {neg}
|
645 |
""")
|
646 |
|
647 |
+
imgs = all_pipes(pos,neg,pos_artist,pos_song)
|
648 |
+
|
649 |
+
names = []
|
650 |
+
for img in imgs:
|
651 |
+
labeled_img = add_song_cover_text(img,artist,song,height*4,width*4)
|
652 |
+
name = f'{artist} - {song} ({index}).png'
|
653 |
+
labeled_img.save(name)
|
654 |
+
names.append(name)
|
655 |
+
|
656 |
+
return names
|
657 |
|
658 |
# entry
|
659 |
|
|
|
695 |
run = gr.Button("Generate",elem_classes="btn")
|
696 |
|
697 |
run.click(
|
698 |
+
fn=handle_generation,
|
699 |
inputs=[artist,song,genre,lyrics],
|
700 |
outputs=[cover]
|
701 |
)
|