Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,201 +1,56 @@
|
|
1 |
import gradio as gr
|
2 |
-
import numpy as np
|
3 |
-
import random
|
4 |
-
|
5 |
-
import spaces
|
6 |
-
from diffusers import DiffusionPipeline, FlowMatchEulerDiscreteScheduler
|
7 |
import torch
|
|
|
|
|
8 |
|
|
|
9 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
10 |
-
model_repo_id = "tensorart/stable-diffusion-3.5-large-TurboX"
|
11 |
-
|
12 |
-
if torch.cuda.is_available():
|
13 |
-
torch_dtype = torch.float16
|
14 |
-
else:
|
15 |
-
torch_dtype = torch.float32
|
16 |
-
|
17 |
-
pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
|
18 |
-
|
19 |
-
pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(model_repo_id, subfolder="scheduler", shift=5)
|
20 |
-
|
21 |
-
pipe = pipe.to(device)
|
22 |
-
|
23 |
-
MAX_SEED = np.iinfo(np.int32).max
|
24 |
-
MAX_IMAGE_SIZE = 1024
|
25 |
-
|
26 |
-
@spaces.GPU(duration=65)
|
27 |
-
def infer(
|
28 |
-
prompt,
|
29 |
-
negative_prompt="",
|
30 |
-
seed=42,
|
31 |
-
randomize_seed=False,
|
32 |
-
width=1024,
|
33 |
-
height=1024,
|
34 |
-
guidance_scale=1.5,
|
35 |
-
num_inference_steps=8,
|
36 |
-
progress=gr.Progress(track_tqdm=True),
|
37 |
-
):
|
38 |
-
if randomize_seed:
|
39 |
-
seed = random.randint(0, MAX_SEED)
|
40 |
-
|
41 |
-
generator = torch.Generator().manual_seed(seed)
|
42 |
-
|
43 |
-
image = pipe(
|
44 |
-
prompt=prompt,
|
45 |
-
negative_prompt=negative_prompt,
|
46 |
-
guidance_scale=guidance_scale,
|
47 |
-
num_inference_steps=num_inference_steps,
|
48 |
-
width=width,
|
49 |
-
height=height,
|
50 |
-
generator=generator,
|
51 |
-
).images[0]
|
52 |
-
|
53 |
-
return image, seed
|
54 |
-
|
55 |
-
|
56 |
-
examples = [
|
57 |
-
"A capybara wearing a suit holding a sign that reads Hello World",
|
58 |
-
"A serene mountain lake at sunset with cherry blossoms floating on the water",
|
59 |
-
"A magical crystal dragon with iridescent scales in a glowing forest",
|
60 |
-
"A Victorian steampunk teapot with intricate brass gears and rose gold accents",
|
61 |
-
"A futuristic neon cityscape with flying cars and holographic billboards",
|
62 |
-
"A red panda painter creating a masterpiece with tiny paws in an art studio",
|
63 |
-
]
|
64 |
-
|
65 |
-
css = """
|
66 |
-
body {
|
67 |
-
background: linear-gradient(135deg, #f9e2e6 0%, #e8f3fc 50%, #e2f9f2 100%);
|
68 |
-
background-attachment: fixed;
|
69 |
-
min-height: 100vh;
|
70 |
-
}
|
71 |
-
|
72 |
-
#col-container {
|
73 |
-
margin: 0 auto;
|
74 |
-
max-width: 640px;
|
75 |
-
background-color: rgba(255, 255, 255, 0.85);
|
76 |
-
border-radius: 16px;
|
77 |
-
box-shadow: 0 8px 16px rgba(0, 0, 0, 0.1);
|
78 |
-
padding: 24px;
|
79 |
-
backdrop-filter: blur(10px);
|
80 |
-
}
|
81 |
-
|
82 |
-
.gradio-container {
|
83 |
-
background: transparent !important;
|
84 |
-
}
|
85 |
-
|
86 |
-
.gr-button-primary {
|
87 |
-
background: linear-gradient(90deg, #6b9dfc, #8c6bfc) !important;
|
88 |
-
border: none !important;
|
89 |
-
transition: all 0.3s ease;
|
90 |
-
}
|
91 |
-
|
92 |
-
.gr-button-primary:hover {
|
93 |
-
transform: translateY(-2px);
|
94 |
-
box-shadow: 0 5px 15px rgba(108, 99, 255, 0.3);
|
95 |
-
}
|
96 |
-
|
97 |
-
.gr-form {
|
98 |
-
border-radius: 12px;
|
99 |
-
background-color: rgba(255, 255, 255, 0.7);
|
100 |
-
}
|
101 |
-
|
102 |
-
.gr-accordion {
|
103 |
-
border-radius: 12px;
|
104 |
-
overflow: hidden;
|
105 |
-
}
|
106 |
-
|
107 |
-
h1 {
|
108 |
-
background: linear-gradient(90deg, #6b9dfc, #8c6bfc);
|
109 |
-
-webkit-background-clip: text;
|
110 |
-
-webkit-text-fill-color: transparent;
|
111 |
-
font-weight: 800;
|
112 |
-
}
|
113 |
-
"""
|
114 |
-
|
115 |
-
with gr.Blocks(theme="apriel", css=css) as demo:
|
116 |
-
with gr.Column(elem_id="col-container"):
|
117 |
-
gr.Markdown(" # TensorArt Stable Diffusion 3.5 Large TurboX")
|
118 |
-
gr.Markdown("[8-step distilled turbo model](https://huggingface.co/tensorart/stable-diffusion-3.5-large-TurboX)")
|
119 |
-
with gr.Row():
|
120 |
-
prompt = gr.Text(
|
121 |
-
label="Prompt",
|
122 |
-
show_label=False,
|
123 |
-
max_lines=1,
|
124 |
-
placeholder="Enter your prompt",
|
125 |
-
container=False,
|
126 |
-
)
|
127 |
-
|
128 |
-
run_button = gr.Button("Run", scale=0, variant="primary")
|
129 |
-
|
130 |
-
result = gr.Image(label="Result", show_label=False)
|
131 |
-
|
132 |
-
with gr.Accordion("Advanced Settings", open=False):
|
133 |
-
negative_prompt = gr.Text(
|
134 |
-
label="Negative prompt",
|
135 |
-
max_lines=1,
|
136 |
-
placeholder="Enter a negative prompt",
|
137 |
-
)
|
138 |
-
|
139 |
-
seed = gr.Slider(
|
140 |
-
label="Seed",
|
141 |
-
minimum=0,
|
142 |
-
maximum=MAX_SEED,
|
143 |
-
step=1,
|
144 |
-
value=0,
|
145 |
-
)
|
146 |
-
|
147 |
-
randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
|
148 |
-
|
149 |
-
with gr.Row():
|
150 |
-
width = gr.Slider(
|
151 |
-
label="Width",
|
152 |
-
minimum=512,
|
153 |
-
maximum=MAX_IMAGE_SIZE,
|
154 |
-
step=32,
|
155 |
-
value=1024,
|
156 |
-
)
|
157 |
-
|
158 |
-
height = gr.Slider(
|
159 |
-
label="Height",
|
160 |
-
minimum=512,
|
161 |
-
maximum=MAX_IMAGE_SIZE,
|
162 |
-
step=32,
|
163 |
-
value=1024,
|
164 |
-
)
|
165 |
-
|
166 |
-
with gr.Row():
|
167 |
-
guidance_scale = gr.Slider(
|
168 |
-
label="Guidance scale",
|
169 |
-
minimum=0.0,
|
170 |
-
maximum=7.5,
|
171 |
-
step=0.1,
|
172 |
-
value=1.5,
|
173 |
-
)
|
174 |
-
|
175 |
-
num_inference_steps = gr.Slider(
|
176 |
-
label="Number of inference steps",
|
177 |
-
minimum=1,
|
178 |
-
maximum=50,
|
179 |
-
step=1,
|
180 |
-
value=8,
|
181 |
-
)
|
182 |
-
|
183 |
-
gr.Examples(examples=examples, inputs=[prompt], outputs=[result, seed], fn=infer, cache_examples=True, cache_mode="lazy")
|
184 |
-
gr.on(
|
185 |
-
triggers=[run_button.click, prompt.submit],
|
186 |
-
fn=infer,
|
187 |
-
inputs=[
|
188 |
-
prompt,
|
189 |
-
negative_prompt,
|
190 |
-
seed,
|
191 |
-
randomize_seed,
|
192 |
-
width,
|
193 |
-
height,
|
194 |
-
guidance_scale,
|
195 |
-
num_inference_steps,
|
196 |
-
],
|
197 |
-
outputs=[result, seed],
|
198 |
-
)
|
199 |
|
200 |
-
|
201 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
2 |
import torch
|
3 |
+
from PIL import Image
|
4 |
+
from transformers import BlipProcessor, BlipForConditionalGeneration
|
5 |
|
6 |
+
# 1. ์ฅ์น ์ค์
|
7 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
# 2. ๋ชจ๋ธ ๋ฐ ํ๋ก์ธ์ ๋ก๋
|
10 |
+
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
|
11 |
+
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
|
12 |
+
|
13 |
+
# 3. ์ด๋ฏธ์ง ์ค๋ช
์์ฑ ํจ์
|
14 |
+
def generate_caption(image):
|
15 |
+
if image is None:
|
16 |
+
return "์ด๋ฏธ์ง๋ฅผ ์
๋ก๋ํด์ฃผ์ธ์."
|
17 |
+
|
18 |
+
# ๊ณ ์ ์ฒ๋ฆฌ๋ฅผ ์ํ ๋ฆฌ์ฌ์ด์ฆ
|
19 |
+
image = image.resize((384, 384))
|
20 |
+
|
21 |
+
# ์ค๋ช
์์ฑ
|
22 |
+
inputs = processor(images=image, return_tensors="pt").to(device)
|
23 |
+
output_ids = model.generate(**inputs, max_length=50)
|
24 |
+
caption = processor.decode(output_ids[0], skip_special_tokens=True)
|
25 |
+
print("โ
์์ฑ๋ ์ค๋ช
:", caption)
|
26 |
+
if "Asian" in caption:
|
27 |
+
caption = caption.replace("Asian", "Korean")
|
28 |
+
print("โ
์์ฑ๋ ์ค๋ช
:", caption)
|
29 |
+
return caption
|
30 |
+
return caption
|
31 |
+
|
32 |
+
# 4. Gradio ์ธํฐํ์ด์ค ๊ตฌ์ฑ
|
33 |
+
with gr.Blocks(title="์ด๋ฏธ์ง ์ค๋ช
์์ฑ๊ธฐ") as demo:
|
34 |
+
gr.Markdown("## ๐ผ๏ธ ์ด๋ฏธ์ง๋ฅผ ์
๋ก๋ํ๋ฉด ์ค๋ช
์ด ์๋ ์์ฑ๋ฉ๋๋ค.")
|
35 |
+
|
36 |
+
with gr.Row():
|
37 |
+
with gr.Column():
|
38 |
+
image_input = gr.Image(label="์
๋ ฅ ์ด๋ฏธ์ง", type="pil")
|
39 |
+
with gr.Column():
|
40 |
+
caption_output = gr.Textbox(label="์์ฑ๋ ์ค๋ช
", lines=3, show_copy_button=True)
|
41 |
+
# HTML๋ก ๋ฒํผ ์์ฑ
|
42 |
+
gr.HTML("""
|
43 |
+
<div style='margin-top: 10px; text-align: center;'>
|
44 |
+
<a href="https://huggingface.co/spaces/VIDraft/stable-diffusion-3.5-large-turboX" target="_blank">
|
45 |
+
<button style='padding: 10px 20px; background-color: #ff9900; color: white; border: none; border-radius: 10px; font-size: 16px; box-shadow: 2px 2px 8px rgba(0,0,0,0.3); cursor: pointer;'>
|
46 |
+
๐จ ์บ๋ฆฌ์ปค์ณ ๋ง๋ค๊ธฐ
|
47 |
+
</button>
|
48 |
+
</a>
|
49 |
+
</div>
|
50 |
+
""")
|
51 |
+
|
52 |
+
# ์
๋ก๋ โ ์ค๋ช
์๋ ์์ฑ ์ฐ๊ฒฐ
|
53 |
+
image_input.upload(fn=generate_caption, inputs=image_input, outputs=caption_output)
|
54 |
+
|
55 |
+
# 5. ์ฑ ์คํ
|
56 |
+
demo.launch(debug=True)
|