vivaceailab commited on
Commit
7ab20d4
Β·
verified Β·
1 Parent(s): cc7852d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -19
app.py CHANGED
@@ -1,32 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import torch
3
  import random
4
  from PIL import Image
5
  from transformers import AutoProcessor, AutoModelForCausalLM
6
- from diffusers import DiffusionPipeline, FlowMatchEulerDiscreteScheduler
 
 
 
 
 
 
 
 
 
7
 
8
- # Florence-2 λ‘œλ“œ
9
- device = "cuda" if torch.cuda.is_available() else "cpu"
10
- florence_model = AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True).to(device).eval()
11
- florence_processor = AutoProcessor.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- # Stable Diffusion TurboX λ‘œλ“œ
14
  model_repo = "tensorart/stable-diffusion-3.5-large-TurboX"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  pipe = DiffusionPipeline.from_pretrained(
16
  model_repo,
17
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
 
 
 
 
 
 
 
 
 
18
  )
19
- pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(model_repo, subfolder="scheduler", shift=5)
20
- pipe = pipe.to(device)
21
 
22
  MAX_SEED = 2**31 - 1
23
 
24
  def pseudo_translate_to_korean_style(en_prompt: str) -> str:
25
- # λ²ˆμ—­ 없이 μŠ€νƒ€μΌ 적용
26
- return f"이 μž₯면은 {en_prompt} μž₯λ©΄μž…λ‹ˆλ‹€. 밝고 κ·€μ—¬μš΄ 카툰 μŠ€νƒ€μΌλ‘œ κ·Έλ €μ£Όμ„Έμš”. λ””μ§€ν„Έ 일러슀트 λŠλ‚ŒμœΌλ‘œ λ¬˜μ‚¬ν•΄ μ£Όμ„Έμš”."
27
 
28
  def generate_prompt(image):
29
- """이미지 β†’ μ˜μ–΄ μ„€λͺ… β†’ ν•œκ΅­μ–΄ ν”„λ‘¬ν”„νŠΈ μŠ€νƒ€μΌλ‘œ λ³€ν™˜"""
30
  if not isinstance(image, Image.Image):
31
  image = Image.fromarray(image)
32
 
@@ -44,19 +136,15 @@ def generate_prompt(image):
44
  image_size=(image.width, image.height)
45
  )
46
  prompt_en = parsed_answer["<MORE_DETAILED_CAPTION>"]
47
-
48
- # λ²ˆμ—­κΈ° 없이 μŠ€νƒ€μΌ 적용
49
  cartoon_prompt = pseudo_translate_to_korean_style(prompt_en)
50
  return cartoon_prompt
51
 
52
  def generate_image(prompt, seed=42, randomize_seed=False):
53
- """ν…μŠ€νŠΈ ν”„λ‘¬ν”„νŠΈ β†’ 이미지 생성"""
54
  if randomize_seed:
55
  seed = random.randint(0, MAX_SEED)
56
  generator = torch.Generator().manual_seed(seed)
57
  image = pipe(
58
  prompt=prompt,
59
- negative_prompt="μ™œκ³‘λœ 손, 흐림, μ΄μƒν•œ μ–Όκ΅΄",
60
  guidance_scale=1.5,
61
  num_inference_steps=8,
62
  width=768,
@@ -65,10 +153,9 @@ def generate_image(prompt, seed=42, randomize_seed=False):
65
  ).images[0]
66
  return image, seed
67
 
68
- # Gradio UI ꡬ성
69
  with gr.Blocks() as demo:
70
  gr.Markdown("# πŸ–Ό 이미지 β†’ μ„€λͺ… 생성 β†’ 카툰 이미지 μžλ™ 생성기")
71
-
72
  gr.Markdown("**πŸ“Œ μ‚¬μš©λ²• μ•ˆλ‚΄ (ν•œκ΅­μ–΄)**\n"
73
  "- μ™Όμͺ½μ— 이미지λ₯Ό μ—…λ‘œλ“œν•˜μ„Έμš”.\n"
74
  "- AIκ°€ μ˜μ–΄ μ„€λͺ…을 λ§Œλ“€κ³ , λ‚΄λΆ€μ—μ„œ ν•œκ΅­μ–΄ μŠ€νƒ€μΌ ν”„λ‘¬ν”„νŠΈλ‘œ μž¬κ΅¬μ„±ν•©λ‹ˆλ‹€.\n"
@@ -78,7 +165,6 @@ with gr.Blocks() as demo:
78
  with gr.Column():
79
  input_img = gr.Image(label="🎨 원본 이미지 μ—…λ‘œλ“œ")
80
  run_button = gr.Button("✨ 생성 μ‹œμž‘")
81
-
82
  with gr.Column():
83
  prompt_out = gr.Textbox(label="πŸ“ μŠ€νƒ€μΌ 적용된 ν”„λ‘¬ν”„νŠΈ", lines=3, show_copy_button=True)
84
  output_img = gr.Image(label="πŸŽ‰ μƒμ„±λœ 이미지")
 
1
+ import os
2
+ from huggingface_hub import snapshot_download
3
+
4
+ os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
5
+ REVISION = "ceaf371f01ef66192264811b390bccad475a4f02"
6
+
7
+ LOCAL_FLORENCE = snapshot_download(
8
+ repo_id="microsoft/Florence-2-base",
9
+ revision=REVISION
10
+ )
11
+
12
+ LOCAL_TURBOX = snapshot_download(
13
+ repo_id="tensorart/stable-diffusion-3.5-large-TurboX"
14
+ )
15
+
16
+ LOCAL_FLORENCE_DIR = snapshot_download(
17
+ repo_id="microsoft/Florence-2-base",
18
+ revision=REVISION,
19
+ local_files_only=False
20
+ )
21
+
22
+ import sys, types, importlib.machinery, importlib
23
+
24
+ # flash_attn λ¬΄νš¨ν™” 처리
25
+ spec = importlib.machinery.ModuleSpec('flash_attn', loader=None)
26
+ mod = types.ModuleType('flash_attn')
27
+ mod.__spec__ = spec
28
+ sys.modules['flash_attn'] = mod
29
+
30
+ import huggingface_hub as _hf_hub
31
+ _hf_hub.cached_download = _hf_hub.hf_hub_download
32
+
33
  import gradio as gr
34
  import torch
35
  import random
36
  from PIL import Image
37
  from transformers import AutoProcessor, AutoModelForCausalLM
38
+ from transformers import (
39
+ CLIPTextModel,
40
+ CLIPTokenizer,
41
+ CLIPFeatureExtractor,
42
+ )
43
+ import diffusers
44
+ from diffusers import StableDiffusionPipeline
45
+ from diffusers import DiffusionPipeline
46
+ from diffusers import EulerDiscreteScheduler as FlowMatchEulerDiscreteScheduler
47
+ from diffusers import UNet2DConditionModel
48
 
49
+ # flash attention κ΄€λ ¨ import 우회
50
+ import transformers.utils.import_utils as _import_utils
51
+ from transformers.utils import is_flash_attn_2_available
52
+ _import_utils._is_package_available = lambda pkg: False
53
+ _import_utils.is_flash_attn_2_available = lambda: False
54
+
55
+ hf_utils = importlib.import_module('transformers.utils')
56
+ hf_utils.is_flash_attn_2_available = lambda *a, **k: False
57
+ hf_utils.is_flash_attn_greater_or_equal_2_10 = lambda *a, **k: False
58
+
59
+ mask_utils = importlib.import_module("transformers.modeling_attn_mask_utils")
60
+ for fn in ("_prepare_4d_attention_mask_for_sdpa", "_prepare_4d_causal_attention_mask_for_sdpa"):
61
+ if not hasattr(mask_utils, fn):
62
+ setattr(mask_utils, fn, lambda *a, **k: None)
63
+
64
+ cfg_mod = importlib.import_module("transformers.configuration_utils")
65
+ _PrC = cfg_mod.PretrainedConfig
66
+ _orig_getattr = _PrC.__getattribute__
67
+ def _getattr(self, name):
68
+ if name == "_attn_implementation":
69
+ return "sdpa"
70
+ return _orig_getattr(self, name)
71
+ _PrC.__getattribute__ = _getattr
72
 
 
73
  model_repo = "tensorart/stable-diffusion-3.5-large-TurboX"
74
+ device = "cuda" if torch.cuda.is_available() else "cpu"
75
+
76
+ scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
77
+ model_repo,
78
+ subfolder="scheduler",
79
+ torch_dtype=torch.float16,
80
+ )
81
+
82
+ text_encoder = CLIPTextModel.from_pretrained(
83
+ model_repo, subfolder="text_encoder", torch_dtype=torch.float16
84
+ )
85
+ tokenizer = CLIPTokenizer.from_pretrained(model_repo, subfolder="tokenizer")
86
+ feature_extractor = CLIPFeatureExtractor.from_pretrained("runwayml/stable-diffusion-v1-5", subfolder="feature_extractor")
87
+
88
+ unet = UNet2DConditionModel.from_pretrained(
89
+ model_repo, subfolder="unet", torch_dtype=torch.float16
90
+ )
91
+
92
+ florence_model = AutoModelForCausalLM.from_pretrained(
93
+ LOCAL_FLORENCE, trust_remote_code=True, torch_dtype=torch.float16
94
+ )
95
+ florence_model.to("cpu")
96
+ florence_model.eval()
97
+ florence_processor = AutoProcessor.from_pretrained(
98
+ LOCAL_FLORENCE, trust_remote_code=True
99
+ )
100
+
101
+ diffusers.StableDiffusion3Pipeline = StableDiffusionPipeline
102
  pipe = DiffusionPipeline.from_pretrained(
103
  model_repo,
104
+ torch_dtype=torch.float16,
105
+ trust_remote_code=True,
106
+ safety_checker=None,
107
+ feature_extractor=None
108
+ )
109
+ pipe = pipe.to("cuda")
110
+
111
+ pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
112
+ model_repo, subfolder="scheduler", local_files_only=True,
113
+ trust_remote_code=True, shift=5
114
  )
 
 
115
 
116
  MAX_SEED = 2**31 - 1
117
 
118
  def pseudo_translate_to_korean_style(en_prompt: str) -> str:
119
+ return f"Cartoon styled {en_prompt} handsome or pretty people"
 
120
 
121
  def generate_prompt(image):
 
122
  if not isinstance(image, Image.Image):
123
  image = Image.fromarray(image)
124
 
 
136
  image_size=(image.width, image.height)
137
  )
138
  prompt_en = parsed_answer["<MORE_DETAILED_CAPTION>"]
 
 
139
  cartoon_prompt = pseudo_translate_to_korean_style(prompt_en)
140
  return cartoon_prompt
141
 
142
  def generate_image(prompt, seed=42, randomize_seed=False):
 
143
  if randomize_seed:
144
  seed = random.randint(0, MAX_SEED)
145
  generator = torch.Generator().manual_seed(seed)
146
  image = pipe(
147
  prompt=prompt,
 
148
  guidance_scale=1.5,
149
  num_inference_steps=8,
150
  width=768,
 
153
  ).images[0]
154
  return image, seed
155
 
156
+ # Gradio UI
157
  with gr.Blocks() as demo:
158
  gr.Markdown("# πŸ–Ό 이미지 β†’ μ„€λͺ… 생성 β†’ 카툰 이미지 μžλ™ 생성기")
 
159
  gr.Markdown("**πŸ“Œ μ‚¬μš©λ²• μ•ˆλ‚΄ (ν•œκ΅­μ–΄)**\n"
160
  "- μ™Όμͺ½μ— 이미지λ₯Ό μ—…λ‘œλ“œν•˜μ„Έμš”.\n"
161
  "- AIκ°€ μ˜μ–΄ μ„€λͺ…을 λ§Œλ“€κ³ , λ‚΄λΆ€μ—μ„œ ν•œκ΅­μ–΄ μŠ€νƒ€μΌ ν”„λ‘¬ν”„νŠΈλ‘œ μž¬κ΅¬μ„±ν•©λ‹ˆλ‹€.\n"
 
165
  with gr.Column():
166
  input_img = gr.Image(label="🎨 원본 이미지 μ—…λ‘œλ“œ")
167
  run_button = gr.Button("✨ 생성 μ‹œμž‘")
 
168
  with gr.Column():
169
  prompt_out = gr.Textbox(label="πŸ“ μŠ€νƒ€μΌ 적용된 ν”„λ‘¬ν”„νŠΈ", lines=3, show_copy_button=True)
170
  output_img = gr.Image(label="πŸŽ‰ μƒμ„±λœ 이미지")