Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,83 +1,93 @@
|
|
1 |
-
|
2 |
-
import
|
3 |
-
import
|
4 |
-
import
|
5 |
-
from
|
6 |
-
from
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
#
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
pipe =
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import torch
|
3 |
+
import random
|
4 |
+
from PIL import Image
|
5 |
+
from transformers import AutoProcessor, AutoModelForCausalLM, pipeline
|
6 |
+
from diffusers import DiffusionPipeline, FlowMatchEulerDiscreteScheduler
|
7 |
+
|
8 |
+
# Florence-2 ๋ก๋ (์ด๋ฏธ์ง ์บก์
๋)
|
9 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
10 |
+
florence_model = AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True).to(device).eval()
|
11 |
+
florence_processor = AutoProcessor.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True)
|
12 |
+
|
13 |
+
# ์์ด โ ํ๊ตญ์ด ๋ฒ์ญ๊ธฐ
|
14 |
+
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ko", device=0 if torch.cuda.is_available() else -1)
|
15 |
+
|
16 |
+
# Stable Diffusion TurboX ๋ก๋
|
17 |
+
model_repo = "tensorart/stable-diffusion-3.5-large-TurboX"
|
18 |
+
pipe = DiffusionPipeline.from_pretrained(
|
19 |
+
model_repo,
|
20 |
+
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
|
21 |
+
)
|
22 |
+
pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(model_repo, subfolder="scheduler", shift=5)
|
23 |
+
pipe = pipe.to(device)
|
24 |
+
|
25 |
+
MAX_SEED = 2**31 - 1
|
26 |
+
|
27 |
+
def generate_prompt(image):
|
28 |
+
"""์ด๋ฏธ์ง โ ์์ด ์ค๋ช
โ ํ๊ตญ์ด ๋ฒ์ญ + ์นดํฐ ์คํ์ผ ์ ์ฉ"""
|
29 |
+
if not isinstance(image, Image.Image):
|
30 |
+
image = Image.fromarray(image)
|
31 |
+
|
32 |
+
inputs = florence_processor(text="<MORE_DETAILED_CAPTION>", images=image, return_tensors="pt").to(device)
|
33 |
+
generated_ids = florence_model.generate(
|
34 |
+
input_ids=inputs["input_ids"],
|
35 |
+
pixel_values=inputs["pixel_values"],
|
36 |
+
max_new_tokens=512,
|
37 |
+
num_beams=3
|
38 |
+
)
|
39 |
+
generated_text = florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
|
40 |
+
parsed_answer = florence_processor.post_process_generation(
|
41 |
+
generated_text,
|
42 |
+
task="<MORE_DETAILED_CAPTION>",
|
43 |
+
image_size=(image.width, image.height)
|
44 |
+
)
|
45 |
+
prompt_en = parsed_answer["<MORE_DETAILED_CAPTION>"]
|
46 |
+
|
47 |
+
# ์์ด โ ํ๊ตญ์ด ๋ฒ์ญ ํ ์นดํฐ ์คํ์ผ ์ถ๊ฐ
|
48 |
+
translated = translator(prompt_en, max_length=256)[0]['translation_text']
|
49 |
+
cartoon_prompt = f"{translated}, ์นดํฐ ์คํ์ผ, ๋ฐ์ ์๊ฐ, ๋์งํธ ์ผ๋ฌ์คํธ, ๊ท์ฌ์ด ๋ถ์๊ธฐ"
|
50 |
+
return cartoon_prompt
|
51 |
+
|
52 |
+
def generate_image(prompt, seed=42, randomize_seed=False):
|
53 |
+
"""ํ
์คํธ ํ๋กฌํํธ โ ์ด๋ฏธ์ง ์์ฑ"""
|
54 |
+
if randomize_seed:
|
55 |
+
seed = random.randint(0, MAX_SEED)
|
56 |
+
generator = torch.Generator().manual_seed(seed)
|
57 |
+
image = pipe(
|
58 |
+
prompt=prompt,
|
59 |
+
negative_prompt="์๊ณก๋ ์ผ๊ตด, ์ด์ํ ์, ํ๋ฆผ, ์ ํด์๋",
|
60 |
+
guidance_scale=1.5,
|
61 |
+
num_inference_steps=8,
|
62 |
+
width=768,
|
63 |
+
height=768,
|
64 |
+
generator=generator
|
65 |
+
).images[0]
|
66 |
+
return image, seed
|
67 |
+
|
68 |
+
# Gradio UI ์ ์
|
69 |
+
with gr.Blocks() as demo:
|
70 |
+
gr.Markdown("# ๐ผ ์ด๋ฏธ์ง โ ํ๊ตญ์ด ํ๋กฌํํธ โ ์นดํฐ ์คํ์ผ ์ด๋ฏธ์ง ์์ฑ๊ธฐ")
|
71 |
+
|
72 |
+
gr.Markdown("**๐ ์ฌ์ฉ๋ฒ ์๋ด (ํ๊ตญ์ด)**\n"
|
73 |
+
"- ์ผ์ชฝ์ ์ด๋ฏธ์ง๋ฅผ ์
๋ก๋ํ์ธ์.\n"
|
74 |
+
"- ์๋์ผ๋ก ์์ด ํ๋กฌํํธ๋ฅผ ์์ฑํ ํ, ํ๊ตญ์ด๋ก ๋ฒ์ญํ๊ณ ์นดํฐ ์คํ์ผ์ ์ ์ฉํฉ๋๋ค.\n"
|
75 |
+
"- ์ค๋ฅธ์ชฝ์ ํด๋น ํ๋กฌํํธ๋ก ์์ฑ๋ ์ด๋ฏธ์ง๊ฐ ์ถ๋ ฅ๋ฉ๋๋ค.\n")
|
76 |
+
|
77 |
+
with gr.Row():
|
78 |
+
with gr.Column():
|
79 |
+
input_img = gr.Image(label="๐จ ์๋ณธ ์ด๋ฏธ์ง ์
๋ก๋")
|
80 |
+
run_button = gr.Button("โจ ์ด๋ฏธ์ง ๋ถ์ ๋ฐ ์์ฑ ์์")
|
81 |
+
|
82 |
+
with gr.Column():
|
83 |
+
prompt_out = gr.Textbox(label="๐ ์๋ ์์ฑ๋ ํ๋กฌํํธ (ํ๊ตญ์ด)", lines=3, show_copy_button=True)
|
84 |
+
output_img = gr.Image(label="๐ ์์ฑ๋ ์นดํฐ ์ด๋ฏธ์ง")
|
85 |
+
|
86 |
+
def full_process(img):
|
87 |
+
prompt = generate_prompt(img)
|
88 |
+
image, seed = generate_image(prompt, randomize_seed=True)
|
89 |
+
return prompt, image
|
90 |
+
|
91 |
+
run_button.click(fn=full_process, inputs=[input_img], outputs=[prompt_out, output_img])
|
92 |
+
|
93 |
+
demo.launch()
|