vivaceailab commited on
Commit
d44ef31
ยท
verified ยท
1 Parent(s): 99d2bfa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -83
app.py CHANGED
@@ -1,83 +1,93 @@
1
- # ํ†ตํ•ฉ๋œ app.py ์˜ˆ์‹œ (ํ•„์ˆ˜ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๋Š” requirements.txt์— ํฌํ•จ ํ•„์š”)
2
- import gradio as gr
3
- import torch
4
- import random
5
- from PIL import Image
6
- from transformers import AutoProcessor, AutoModelForCausalLM, pipeline
7
- from diffusers import DiffusionPipeline, FlowMatchEulerDiscreteScheduler
8
-
9
- # ๋””๋ฐ”์ด์Šค ์„ค์ •
10
- device = "cuda" if torch.cuda.is_available() else "cpu"
11
-
12
- # Florence-2 ๋กœ๋”ฉ (์ด๋ฏธ์ง€ โ†’ ์˜์–ด ์„ค๋ช… ์ƒ์„ฑ)
13
- florence_model = AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True).to(device).eval()
14
- florence_processor = AutoProcessor.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True)
15
-
16
- # ๋ฒˆ์—ญ๊ธฐ (์˜์–ด โ†’ ํ•œ๊ตญ์–ด)
17
- translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ko", device=0 if torch.cuda.is_available() else -1)
18
-
19
- # SD 3.5 ๋ชจ๋ธ ๋กœ๋”ฉ
20
- pipe = DiffusionPipeline.from_pretrained("tensorart/stable-diffusion-3.5-large-TurboX", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
21
- pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained("tensorart/stable-diffusion-3.5-large-TurboX", subfolder="scheduler", shift=5)
22
- pipe = pipe.to(device)
23
-
24
- MAX_SEED = 2**31 - 1
25
-
26
- def generate_prompt(image):
27
- """์ด๋ฏธ์ง€์—์„œ ์˜์–ด ์บก์…˜ ์ถ”์ถœ ํ›„ ๋ฒˆ์—ญ ๋ฐ ์Šคํƒ€์ผ ๋ณ€ํ™˜"""
28
- if not isinstance(image, Image.Image):
29
- image = Image.fromarray(image)
30
-
31
- inputs = florence_processor(text="<MORE_DETAILED_CAPTION>", images=image, return_tensors="pt").to(device)
32
- generated_ids = florence_model.generate(
33
- input_ids=inputs["input_ids"],
34
- pixel_values=inputs["pixel_values"],
35
- max_new_tokens=512,
36
- num_beams=3
37
- )
38
- generated_text = florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
39
- parsed_answer = florence_processor.post_process_generation(
40
- generated_text,
41
- task="<MORE_DETAILED_CAPTION>",
42
- image_size=(image.width, image.height)
43
- )
44
- prompt_en = parsed_answer["<MORE_DETAILED_CAPTION>"]
45
- translated = translator(prompt_en, max_length=256)[0]['translation_text']
46
- cartoon_prompt = f"{translated}, ์นดํˆฐ ์Šคํƒ€์ผ, ๋””์ง€ํ„ธ ์ผ๋Ÿฌ์ŠคํŠธ, ๋ฐ์€ ์ƒ‰๊ฐ"
47
- return cartoon_prompt
48
-
49
- def generate_image(prompt, seed=42, randomize_seed=False):
50
- if randomize_seed:
51
- seed = random.randint(0, MAX_SEED)
52
- generator = torch.Generator().manual_seed(seed)
53
- image = pipe(
54
- prompt=prompt,
55
- negative_prompt="์™œ๊ณก, ์ด์ƒํ•œ ์†, ๋น„ํ˜„์‹ค์  ๊ตฌ์กฐ",
56
- guidance_scale=1.5,
57
- num_inference_steps=8,
58
- width=768,
59
- height=768,
60
- generator=generator
61
- ).images[0]
62
- return image, seed
63
-
64
- with gr.Blocks() as demo:
65
- gr.Markdown("# ๐Ÿ–ผ ์ด๋ฏธ์ง€ ๊ธฐ๋ฐ˜ ์ž๋™ ํ”„๋กฌํ”„ํŠธ ์ƒ์„ฑ ๋ฐ ํ•œ๊ตญ์–ด ์นดํˆฐ ์Šคํƒ€์ผ ๋ณ€ํ™˜")
66
-
67
- with gr.Row():
68
- with gr.Column():
69
- input_img = gr.Image(label="์™ผ์ชฝ: ์›๋ณธ ์ด๋ฏธ์ง€")
70
- run_button = gr.Button("๐Ÿ‘‰ ํ”„๋กฌํ”„ํŠธ ์ƒ์„ฑ ๋ฐ ์ด๋ฏธ์ง€ ๋ณ€ํ™˜")
71
-
72
- with gr.Column():
73
- prompt_out = gr.Textbox(label="์ž๋™ ์ƒ์„ฑ๋œ ํ•œ๊ตญ์–ด ํ”„๋กฌํ”„ํŠธ", lines=3, show_copy_button=True)
74
- output_img = gr.Image(label="์˜ค๋ฅธ์ชฝ: ์ƒ์„ฑ๋œ ์นดํˆฐ ์Šคํƒ€์ผ ์ด๋ฏธ์ง€")
75
-
76
- def full_process(img):
77
- prompt = generate_prompt(img)
78
- image, seed = generate_image(prompt, randomize_seed=True)
79
- return prompt, image
80
-
81
- run_button.click(fn=full_process, inputs=[input_img], outputs=[prompt_out, output_img])
82
-
83
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import random
4
+ from PIL import Image
5
+ from transformers import AutoProcessor, AutoModelForCausalLM, pipeline
6
+ from diffusers import DiffusionPipeline, FlowMatchEulerDiscreteScheduler
7
+
8
+ # Florence-2 ๋กœ๋“œ (์ด๋ฏธ์ง€ ์บก์…”๋‹)
9
+ device = "cuda" if torch.cuda.is_available() else "cpu"
10
+ florence_model = AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True).to(device).eval()
11
+ florence_processor = AutoProcessor.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True)
12
+
13
+ # ์˜์–ด โ†’ ํ•œ๊ตญ์–ด ๋ฒˆ์—ญ๊ธฐ
14
+ translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ko", device=0 if torch.cuda.is_available() else -1)
15
+
16
+ # Stable Diffusion TurboX ๋กœ๋“œ
17
+ model_repo = "tensorart/stable-diffusion-3.5-large-TurboX"
18
+ pipe = DiffusionPipeline.from_pretrained(
19
+ model_repo,
20
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
21
+ )
22
+ pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(model_repo, subfolder="scheduler", shift=5)
23
+ pipe = pipe.to(device)
24
+
25
+ MAX_SEED = 2**31 - 1
26
+
27
+ def generate_prompt(image):
28
+ """์ด๋ฏธ์ง€ โ†’ ์˜์–ด ์„ค๋ช… โ†’ ํ•œ๊ตญ์–ด ๋ฒˆ์—ญ + ์นดํˆฐ ์Šคํƒ€์ผ ์ ์šฉ"""
29
+ if not isinstance(image, Image.Image):
30
+ image = Image.fromarray(image)
31
+
32
+ inputs = florence_processor(text="<MORE_DETAILED_CAPTION>", images=image, return_tensors="pt").to(device)
33
+ generated_ids = florence_model.generate(
34
+ input_ids=inputs["input_ids"],
35
+ pixel_values=inputs["pixel_values"],
36
+ max_new_tokens=512,
37
+ num_beams=3
38
+ )
39
+ generated_text = florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
40
+ parsed_answer = florence_processor.post_process_generation(
41
+ generated_text,
42
+ task="<MORE_DETAILED_CAPTION>",
43
+ image_size=(image.width, image.height)
44
+ )
45
+ prompt_en = parsed_answer["<MORE_DETAILED_CAPTION>"]
46
+
47
+ # ์˜์–ด โ†’ ํ•œ๊ตญ์–ด ๋ฒˆ์—ญ ํ›„ ์นดํˆฐ ์Šคํƒ€์ผ ์ถ”๊ฐ€
48
+ translated = translator(prompt_en, max_length=256)[0]['translation_text']
49
+ cartoon_prompt = f"{translated}, ์นดํˆฐ ์Šคํƒ€์ผ, ๋ฐ์€ ์ƒ‰๊ฐ, ๋””์ง€ํ„ธ ์ผ๋Ÿฌ์ŠคํŠธ, ๊ท€์—ฌ์šด ๋ถ„์œ„๊ธฐ"
50
+ return cartoon_prompt
51
+
52
+ def generate_image(prompt, seed=42, randomize_seed=False):
53
+ """ํ…์ŠคํŠธ ํ”„๋กฌํ”„ํŠธ โ†’ ์ด๋ฏธ์ง€ ์ƒ์„ฑ"""
54
+ if randomize_seed:
55
+ seed = random.randint(0, MAX_SEED)
56
+ generator = torch.Generator().manual_seed(seed)
57
+ image = pipe(
58
+ prompt=prompt,
59
+ negative_prompt="์™œ๊ณก๋œ ์–ผ๊ตด, ์ด์ƒํ•œ ์†, ํ๋ฆผ, ์ €ํ•ด์ƒ๋„",
60
+ guidance_scale=1.5,
61
+ num_inference_steps=8,
62
+ width=768,
63
+ height=768,
64
+ generator=generator
65
+ ).images[0]
66
+ return image, seed
67
+
68
+ # Gradio UI ์ •์˜
69
+ with gr.Blocks() as demo:
70
+ gr.Markdown("# ๐Ÿ–ผ ์ด๋ฏธ์ง€ โ†’ ํ•œ๊ตญ์–ด ํ”„๋กฌํ”„ํŠธ โ†’ ์นดํˆฐ ์Šคํƒ€์ผ ์ด๋ฏธ์ง€ ์ƒ์„ฑ๊ธฐ")
71
+
72
+ gr.Markdown("**๐Ÿ“Œ ์‚ฌ์šฉ๋ฒ• ์•ˆ๋‚ด (ํ•œ๊ตญ์–ด)**\n"
73
+ "- ์™ผ์ชฝ์— ์ด๋ฏธ์ง€๋ฅผ ์—…๋กœ๋“œํ•˜์„ธ์š”.\n"
74
+ "- ์ž๋™์œผ๋กœ ์˜์–ด ํ”„๋กฌํ”„ํŠธ๋ฅผ ์ƒ์„ฑํ•œ ํ›„, ํ•œ๊ตญ์–ด๋กœ ๋ฒˆ์—ญํ•˜๊ณ  ์นดํˆฐ ์Šคํƒ€์ผ์„ ์ ์šฉํ•ฉ๋‹ˆ๋‹ค.\n"
75
+ "- ์˜ค๋ฅธ์ชฝ์— ํ•ด๋‹น ํ”„๋กฌํ”„ํŠธ๋กœ ์ƒ์„ฑ๋œ ์ด๋ฏธ์ง€๊ฐ€ ์ถœ๋ ฅ๋ฉ๋‹ˆ๋‹ค.\n")
76
+
77
+ with gr.Row():
78
+ with gr.Column():
79
+ input_img = gr.Image(label="๐ŸŽจ ์›๋ณธ ์ด๋ฏธ์ง€ ์—…๋กœ๋“œ")
80
+ run_button = gr.Button("โœจ ์ด๋ฏธ์ง€ ๋ถ„์„ ๋ฐ ์ƒ์„ฑ ์‹œ์ž‘")
81
+
82
+ with gr.Column():
83
+ prompt_out = gr.Textbox(label="๐Ÿ“ ์ž๋™ ์ƒ์„ฑ๋œ ํ”„๋กฌํ”„ํŠธ (ํ•œ๊ตญ์–ด)", lines=3, show_copy_button=True)
84
+ output_img = gr.Image(label="๐ŸŽ‰ ์ƒ์„ฑ๋œ ์นดํˆฐ ์ด๋ฏธ์ง€")
85
+
86
+ def full_process(img):
87
+ prompt = generate_prompt(img)
88
+ image, seed = generate_image(prompt, randomize_seed=True)
89
+ return prompt, image
90
+
91
+ run_button.click(fn=full_process, inputs=[input_img], outputs=[prompt_out, output_img])
92
+
93
+ demo.launch()