Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,8 @@ import threading
|
|
3 |
import gc
|
4 |
import os
|
5 |
import torch
|
6 |
-
|
|
|
7 |
import gradio as gr
|
8 |
import spaces
|
9 |
import transformers
|
@@ -13,24 +14,26 @@ from huggingface_hub import login
|
|
13 |
# ๋ชจ๋ธ ๋ฉ๋ชจ๋ฆฌ ๊ด๋ฆฌ ๋ฐ ์ต์ ํ๋ฅผ ์ํ ์ค์
|
14 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
15 |
DTYPE = torch.bfloat16 if torch.cuda.is_available() else torch.float32
|
16 |
-
MAX_GPU_MEMORY = 80 * 1024 * 1024 * 1024 # 80GB A100 ๊ธฐ์ค
|
17 |
|
18 |
-
# ์ฌ์ฉ ๊ฐ๋ฅํ ๋ชจ๋ธ ๋ชฉ๋ก -
|
19 |
available_models = {
|
|
|
|
|
20 |
"mistralai/Mistral-Small-3.1-24B-Base-2503": "Mistral Small 3.1 (24B)",
|
21 |
-
"bartowski/mistralai_Mistral-Small-3.1-24B-Instruct-2503-GGUF": "Mistral Small 3.1 GGUF (24B)",
|
22 |
"google/gemma-3-27b-it": "Google Gemma 3 (27B)",
|
23 |
"Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen 2.5 Coder (32B)",
|
24 |
"open-r1/OlympicCoder-32B": "Olympic Coder (32B)"
|
25 |
}
|
26 |
|
27 |
-
# ๊ธฐ๋ณธ ๋ชจ๋ธ -
|
28 |
DEFAULT_MODEL_KEY = list(available_models.keys())[0]
|
29 |
DEFAULT_MODEL_VALUE = available_models[DEFAULT_MODEL_KEY]
|
30 |
|
31 |
# ๋ชจ๋ธ ๋ก๋์ ์ฌ์ฉ๋๋ ์ ์ญ ๋ณ์
|
32 |
pipe = None
|
33 |
current_model_name = None
|
|
|
34 |
|
35 |
# Hugging Face ํ ํฐ์ผ๋ก ๋ก๊ทธ์ธ ์๋
|
36 |
try:
|
@@ -70,33 +73,33 @@ latex_delimiters = [
|
|
70 |
# ๋ชจ๋ธ ํฌ๊ธฐ ๊ธฐ๋ฐ ๊ตฌ์ฑ - ๋ชจ๋ธ ํฌ๊ธฐ์ ๋ฐ๋ฅธ ์ต์ ์ค์ ์ ์
|
71 |
MODEL_CONFIG = {
|
72 |
"small": { # <10B
|
73 |
-
"max_memory": {0: "
|
74 |
"offload": False,
|
75 |
"quantization": None
|
76 |
},
|
77 |
"medium": { # 10B-30B
|
78 |
-
"max_memory": {0: "
|
79 |
"offload": False,
|
80 |
-
"quantization": None
|
81 |
},
|
82 |
"large": { # >30B
|
83 |
-
"max_memory": {0: "
|
84 |
"offload": True,
|
85 |
-
"quantization": None
|
86 |
}
|
87 |
}
|
88 |
|
89 |
def get_model_size_category(model_name):
|
90 |
"""๋ชจ๋ธ ํฌ๊ธฐ ์นดํ
๊ณ ๋ฆฌ ๊ฒฐ์ """
|
91 |
-
if "3B" in model_name or "8B" in model_name:
|
92 |
return "small"
|
93 |
-
elif "24B" in model_name or "27B" in model_name:
|
94 |
return "medium"
|
95 |
elif "32B" in model_name or "70B" in model_name:
|
96 |
return "large"
|
97 |
else:
|
98 |
-
# ๊ธฐ๋ณธ๊ฐ์ผ๋ก
|
99 |
-
return "
|
100 |
|
101 |
def clear_gpu_memory():
|
102 |
"""GPU ๋ฉ๋ชจ๋ฆฌ ์ ๋ฆฌ"""
|
@@ -138,26 +141,36 @@ def rebuild_messages(history: list):
|
|
138 |
messages.append({"role": h.role, "content": h.content})
|
139 |
return messages
|
140 |
|
141 |
-
def load_model(model_names):
|
142 |
"""์ ํ๋ ๋ชจ๋ธ ์ด๋ฆ์ ๋ฐ๋ผ ๋ชจ๋ธ ๋ก๋ (A100์ ์ต์ ํ๋ ์ค์ ์ฌ์ฉ)"""
|
143 |
-
global pipe, current_model_name
|
144 |
-
|
145 |
-
# ๊ธฐ์กด ๋ชจ๋ธ ์ ๋ฆฌ
|
146 |
-
clear_gpu_memory()
|
147 |
|
148 |
-
#
|
149 |
-
if
|
150 |
-
|
151 |
-
else:
|
152 |
-
# ์ฒซ ๋ฒ์งธ ์ ํ๋ ๋ชจ๋ธ ์ฌ์ฉ
|
153 |
-
model_name = model_names[0]
|
154 |
|
155 |
-
|
156 |
-
size_category = get_model_size_category(model_name)
|
157 |
-
config = MODEL_CONFIG[size_category]
|
158 |
|
159 |
-
# ๋ชจ๋ธ ๋ก๋ (ํฌ๊ธฐ์ ๋ฐ๋ผ ์ต์ ํ๋ ์ค์ ์ ์ฉ)
|
160 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
# HF_TOKEN ํ๊ฒฝ ๋ณ์ ํ์ธ
|
162 |
hf_token = os.getenv("HF_TOKEN")
|
163 |
# ๊ณตํต ๋งค๊ฐ๋ณ์
|
@@ -166,14 +179,25 @@ def load_model(model_names):
|
|
166 |
"trust_remote_code": True,
|
167 |
}
|
168 |
|
169 |
-
# BitsAndBytes ์ฌ์ฉ
|
170 |
try:
|
171 |
import bitsandbytes
|
172 |
has_bitsandbytes = True
|
173 |
-
print("BitsAndBytes ๋ผ์ด๋ธ๋ฌ๋ฆฌ ๋ก๋ ์ฑ๊ณต")
|
174 |
except ImportError:
|
175 |
has_bitsandbytes = False
|
176 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
|
178 |
# ์์ํ ์ค์ ์ด ํ์ํ๊ณ BitsAndBytes๋ฅผ ์ฌ์ฉํ ์ ์๋ ๊ฒฝ์ฐ
|
179 |
if config["quantization"] and has_bitsandbytes:
|
@@ -184,6 +208,9 @@ def load_model(model_names):
|
|
184 |
bnb_4bit_compute_dtype=DTYPE
|
185 |
)
|
186 |
|
|
|
|
|
|
|
187 |
model = AutoModelForCausalLM.from_pretrained(
|
188 |
model_name,
|
189 |
device_map="auto",
|
@@ -204,6 +231,9 @@ def load_model(model_names):
|
|
204 |
)
|
205 |
else:
|
206 |
# ์์ํ ์์ด ๋ก๋
|
|
|
|
|
|
|
207 |
pipe = pipeline(
|
208 |
"text-generation",
|
209 |
model=model_name,
|
@@ -212,10 +242,19 @@ def load_model(model_names):
|
|
212 |
**common_params
|
213 |
)
|
214 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
current_model_name = model_name
|
216 |
-
|
|
|
217 |
|
218 |
except Exception as e:
|
|
|
219 |
return f"๋ชจ๋ธ ๋ก๋ ์คํจ: {str(e)}"
|
220 |
|
221 |
@spaces.GPU
|
@@ -272,8 +311,6 @@ def bot(
|
|
272 |
messages = rebuild_messages(history)
|
273 |
|
274 |
# ํ์์์ ์ค์
|
275 |
-
import signal
|
276 |
-
|
277 |
class TimeoutError(Exception):
|
278 |
pass
|
279 |
|
@@ -348,7 +385,6 @@ def bot(
|
|
348 |
continue
|
349 |
|
350 |
# ์ต๋ 30์ด ๋๊ธฐ ํ ๋ค์ ๋จ๊ณ๋ก ์งํ
|
351 |
-
import time
|
352 |
join_start_time = time.time()
|
353 |
while t.is_alive() and (time.time() - join_start_time) < 30:
|
354 |
t.join(1) # 1์ด๋ง๋ค ํ์ธ
|
@@ -390,6 +426,35 @@ def get_gpu_info():
|
|
390 |
|
391 |
return "\n".join(gpu_info)
|
392 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
393 |
# Gradio ์ธํฐํ์ด์ค
|
394 |
with gr.Blocks(fill_height=True, title="ThinkFlow - Step-by-step Reasoning Service") as demo:
|
395 |
# ์๋จ์ ํ์ดํ๊ณผ ์ค๋ช
์ถ๊ฐ
|
@@ -423,7 +488,7 @@ with gr.Blocks(fill_height=True, title="ThinkFlow - Step-by-step Reasoning Servi
|
|
423 |
gr.Markdown("""## ๋ชจ๋ธ ์ ํ""")
|
424 |
model_selector = gr.Radio(
|
425 |
choices=list(available_models.values()),
|
426 |
-
value=DEFAULT_MODEL_VALUE,
|
427 |
label="์ฌ์ฉํ LLM ๋ชจ๋ธ ์ ํ",
|
428 |
)
|
429 |
|
@@ -439,7 +504,7 @@ with gr.Blocks(fill_height=True, title="ThinkFlow - Step-by-step Reasoning Servi
|
|
439 |
num_tokens = gr.Slider(
|
440 |
50,
|
441 |
2000,
|
442 |
-
1000,
|
443 |
step=50,
|
444 |
label="์ถ๋ก ๋จ๊ณ๋น ์ต๋ ํ ํฐ ์",
|
445 |
interactive=True,
|
@@ -447,7 +512,7 @@ with gr.Blocks(fill_height=True, title="ThinkFlow - Step-by-step Reasoning Servi
|
|
447 |
final_num_tokens = gr.Slider(
|
448 |
50,
|
449 |
3000,
|
450 |
-
1500,
|
451 |
step=50,
|
452 |
label="์ต์ข
๋ต๋ณ์ ์ต๋ ํ ํฐ ์",
|
453 |
interactive=True,
|
@@ -455,19 +520,12 @@ with gr.Blocks(fill_height=True, title="ThinkFlow - Step-by-step Reasoning Servi
|
|
455 |
do_sample = gr.Checkbox(True, label="์ํ๋ง ์ฌ์ฉ")
|
456 |
temperature = gr.Slider(0.1, 1.0, 0.7, step=0.1, label="์จ๋")
|
457 |
|
458 |
-
#
|
459 |
-
def auto_load_model():
|
460 |
-
# ์ฒซ ๋ฒ์งธ ๋ชจ๋ธ ์๋ ๋ก๋
|
461 |
-
model_key = DEFAULT_MODEL_KEY
|
462 |
-
try:
|
463 |
-
result = load_model([model_key])
|
464 |
-
return result
|
465 |
-
except Exception as e:
|
466 |
-
return f"์๋ ๋ชจ๋ธ ๋ก๋ ์คํจ: {str(e)}"
|
467 |
-
|
468 |
-
# ์์ ์ ์๋์ผ๋ก ๋ชจ๋ธ ๋ก๋ (์คํ์ด์ค๊ฐ ์์๋ ๋)
|
469 |
demo.load(auto_load_model, [], [model_status])
|
470 |
|
|
|
|
|
|
|
471 |
# ์ ํ๋ ๋ชจ๋ธ ๋ก๋ ์ด๋ฒคํธ ์ฐ๊ฒฐ
|
472 |
def get_model_names(selected_model):
|
473 |
# ํ์ ์ด๋ฆ์์ ์๋ ๋ชจ๋ธ ์ด๋ฆ์ผ๋ก ๋ณํ
|
|
|
3 |
import gc
|
4 |
import os
|
5 |
import torch
|
6 |
+
import time
|
7 |
+
import signal
|
8 |
import gradio as gr
|
9 |
import spaces
|
10 |
import transformers
|
|
|
14 |
# ๋ชจ๋ธ ๋ฉ๋ชจ๋ฆฌ ๊ด๋ฆฌ ๋ฐ ์ต์ ํ๋ฅผ ์ํ ์ค์
|
15 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
16 |
DTYPE = torch.bfloat16 if torch.cuda.is_available() else torch.float32
|
17 |
+
MAX_GPU_MEMORY = 80 * 1024 * 1024 * 1024 # 80GB A100 ๊ธฐ์ค
|
18 |
|
19 |
+
# ์ฌ์ฉ ๊ฐ๋ฅํ ๋ชจ๋ธ ๋ชฉ๋ก - ๋ ์์ ๋ชจ๋ธ๋ถํฐ ์์ํ๋๋ก ๋ณ๊ฒฝ
|
20 |
available_models = {
|
21 |
+
"google/gemma-2b": "Google Gemma (2B)", # ๋ ์์ ๋ชจ๋ธ์ ๊ธฐ๋ณธ์ผ๋ก ์ค์
|
22 |
+
"mistralai/Mistral-7B-Instruct-v0.2": "Mistral 7B Instruct v0.2",
|
23 |
"mistralai/Mistral-Small-3.1-24B-Base-2503": "Mistral Small 3.1 (24B)",
|
|
|
24 |
"google/gemma-3-27b-it": "Google Gemma 3 (27B)",
|
25 |
"Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen 2.5 Coder (32B)",
|
26 |
"open-r1/OlympicCoder-32B": "Olympic Coder (32B)"
|
27 |
}
|
28 |
|
29 |
+
# ๊ธฐ๋ณธ ๋ชจ๋ธ - ๊ฐ์ฅ ์์ ๋ชจ๋ธ๋ก ์ค์
|
30 |
DEFAULT_MODEL_KEY = list(available_models.keys())[0]
|
31 |
DEFAULT_MODEL_VALUE = available_models[DEFAULT_MODEL_KEY]
|
32 |
|
33 |
# ๋ชจ๋ธ ๋ก๋์ ์ฌ์ฉ๋๋ ์ ์ญ ๋ณ์
|
34 |
pipe = None
|
35 |
current_model_name = None
|
36 |
+
loading_in_progress = False
|
37 |
|
38 |
# Hugging Face ํ ํฐ์ผ๋ก ๋ก๊ทธ์ธ ์๋
|
39 |
try:
|
|
|
73 |
# ๋ชจ๋ธ ํฌ๊ธฐ ๊ธฐ๋ฐ ๊ตฌ์ฑ - ๋ชจ๋ธ ํฌ๊ธฐ์ ๋ฐ๋ฅธ ์ต์ ์ค์ ์ ์
|
74 |
MODEL_CONFIG = {
|
75 |
"small": { # <10B
|
76 |
+
"max_memory": {0: "10GiB"},
|
77 |
"offload": False,
|
78 |
"quantization": None
|
79 |
},
|
80 |
"medium": { # 10B-30B
|
81 |
+
"max_memory": {0: "30GiB"},
|
82 |
"offload": False,
|
83 |
+
"quantization": None
|
84 |
},
|
85 |
"large": { # >30B
|
86 |
+
"max_memory": {0: "60GiB"},
|
87 |
"offload": True,
|
88 |
+
"quantization": None
|
89 |
}
|
90 |
}
|
91 |
|
92 |
def get_model_size_category(model_name):
|
93 |
"""๋ชจ๋ธ ํฌ๊ธฐ ์นดํ
๊ณ ๋ฆฌ ๊ฒฐ์ """
|
94 |
+
if "2B" in model_name or "3B" in model_name or "7B" in model_name or "8B" in model_name:
|
95 |
return "small"
|
96 |
+
elif "15B" in model_name or "24B" in model_name or "27B" in model_name:
|
97 |
return "medium"
|
98 |
elif "32B" in model_name or "70B" in model_name:
|
99 |
return "large"
|
100 |
else:
|
101 |
+
# ๊ธฐ๋ณธ๊ฐ์ผ๋ก small ๋ฐํ (์์ ์ ์ํด)
|
102 |
+
return "small"
|
103 |
|
104 |
def clear_gpu_memory():
|
105 |
"""GPU ๋ฉ๋ชจ๋ฆฌ ์ ๋ฆฌ"""
|
|
|
141 |
messages.append({"role": h.role, "content": h.content})
|
142 |
return messages
|
143 |
|
144 |
+
def load_model(model_names, status_callback=None):
|
145 |
"""์ ํ๋ ๋ชจ๋ธ ์ด๋ฆ์ ๋ฐ๋ผ ๋ชจ๋ธ ๋ก๋ (A100์ ์ต์ ํ๋ ์ค์ ์ฌ์ฉ)"""
|
146 |
+
global pipe, current_model_name, loading_in_progress
|
|
|
|
|
|
|
147 |
|
148 |
+
# ์ด๋ฏธ ๋ก๋ฉ ์ค์ธ ๊ฒฝ์ฐ
|
149 |
+
if loading_in_progress:
|
150 |
+
return "๋ค๋ฅธ ๋ชจ๋ธ์ด ์ด๋ฏธ ๋ก๋ ์ค์
๋๋ค. ์ ์ ๊ธฐ๋ค๋ ค์ฃผ์ธ์."
|
|
|
|
|
|
|
151 |
|
152 |
+
loading_in_progress = True
|
|
|
|
|
153 |
|
|
|
154 |
try:
|
155 |
+
# ๊ธฐ์กด ๋ชจ๋ธ ์ ๋ฆฌ
|
156 |
+
clear_gpu_memory()
|
157 |
+
|
158 |
+
# ๋ชจ๋ธ์ด ์ ํ๋์ง ์์์ ๊ฒฝ์ฐ ๊ธฐ๋ณธ๊ฐ ์ง์
|
159 |
+
if not model_names:
|
160 |
+
model_name = DEFAULT_MODEL_KEY
|
161 |
+
else:
|
162 |
+
# ์ฒซ ๋ฒ์งธ ์ ํ๋ ๋ชจ๋ธ ์ฌ์ฉ
|
163 |
+
model_name = model_names[0]
|
164 |
+
|
165 |
+
# ๋ชจ๋ธ ํฌ๊ธฐ ์นดํ
๊ณ ๋ฆฌ ํ์ธ
|
166 |
+
size_category = get_model_size_category(model_name)
|
167 |
+
config = MODEL_CONFIG[size_category]
|
168 |
+
|
169 |
+
# ๋ก๋ฉ ์ํ ์
๋ฐ์ดํธ
|
170 |
+
if status_callback:
|
171 |
+
status_callback(f"๋ชจ๋ธ '{model_name}' ๋ก๋ ์ค... (ํฌ๊ธฐ: {size_category})")
|
172 |
+
|
173 |
+
# ๋ชจ๋ธ ๋ก๋ (ํฌ๊ธฐ์ ๋ฐ๋ผ ์ต์ ํ๋ ์ค์ ์ ์ฉ)
|
174 |
# HF_TOKEN ํ๊ฒฝ ๋ณ์ ํ์ธ
|
175 |
hf_token = os.getenv("HF_TOKEN")
|
176 |
# ๊ณตํต ๋งค๊ฐ๋ณ์
|
|
|
179 |
"trust_remote_code": True,
|
180 |
}
|
181 |
|
182 |
+
# BitsAndBytes ์ฌ์ฉ ์ฌ๋ถ ํ์ธ
|
183 |
try:
|
184 |
import bitsandbytes
|
185 |
has_bitsandbytes = True
|
|
|
186 |
except ImportError:
|
187 |
has_bitsandbytes = False
|
188 |
+
if status_callback:
|
189 |
+
status_callback(f"BitsAndBytes ๋ผ์ด๋ธ๋ฌ๋ฆฌ๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค. ์์ํ ์์ด ๋ก๋ํฉ๋๋ค.")
|
190 |
+
|
191 |
+
# ์๊ฐ ์ ํ ์ค์ (๋ชจ๋ธ ํฌ๊ธฐ์ ๋ฐ๋ผ ๋ค๋ฅด๊ฒ)
|
192 |
+
if size_category == "small":
|
193 |
+
load_timeout = 180 # 3๋ถ
|
194 |
+
elif size_category == "medium":
|
195 |
+
load_timeout = 300 # 5๋ถ
|
196 |
+
else:
|
197 |
+
load_timeout = 600 # 10๋ถ
|
198 |
+
|
199 |
+
# ๋ก๋ฉ ์์ ์๊ฐ
|
200 |
+
start_time = time.time()
|
201 |
|
202 |
# ์์ํ ์ค์ ์ด ํ์ํ๊ณ BitsAndBytes๋ฅผ ์ฌ์ฉํ ์ ์๋ ๊ฒฝ์ฐ
|
203 |
if config["quantization"] and has_bitsandbytes:
|
|
|
208 |
bnb_4bit_compute_dtype=DTYPE
|
209 |
)
|
210 |
|
211 |
+
if status_callback:
|
212 |
+
status_callback(f"๋ชจ๋ธ '{model_name}' ๋ก๋ ์ค... (์์ํ ์ ์ฉ)")
|
213 |
+
|
214 |
model = AutoModelForCausalLM.from_pretrained(
|
215 |
model_name,
|
216 |
device_map="auto",
|
|
|
231 |
)
|
232 |
else:
|
233 |
# ์์ํ ์์ด ๋ก๋
|
234 |
+
if status_callback:
|
235 |
+
status_callback(f"๋ชจ๋ธ '{model_name}' ๋ก๋ ์ค... (ํ์ค ๋ฐฉ์)")
|
236 |
+
|
237 |
pipe = pipeline(
|
238 |
"text-generation",
|
239 |
model=model_name,
|
|
|
242 |
**common_params
|
243 |
)
|
244 |
|
245 |
+
# ์๊ฐ ์ ํ ์ด๊ณผ ํ์ธ
|
246 |
+
elapsed_time = time.time() - start_time
|
247 |
+
if elapsed_time > load_timeout:
|
248 |
+
clear_gpu_memory()
|
249 |
+
loading_in_progress = False
|
250 |
+
return f"๋ชจ๋ธ ๋ก๋ ์๊ฐ ์ด๊ณผ: {load_timeout}์ด๊ฐ ์ง๋ฌ์ต๋๋ค. ๋ค์ ์๋ํ์ธ์."
|
251 |
+
|
252 |
current_model_name = model_name
|
253 |
+
loading_in_progress = False
|
254 |
+
return f"๋ชจ๋ธ '{model_name}'์ด(๊ฐ) ์ฑ๊ณต์ ์ผ๋ก ๋ก๋๋์์ต๋๋ค. (์ต์ ํ: {size_category}, ์์์๊ฐ: {elapsed_time:.1f}์ด)"
|
255 |
|
256 |
except Exception as e:
|
257 |
+
loading_in_progress = False
|
258 |
return f"๋ชจ๋ธ ๋ก๋ ์คํจ: {str(e)}"
|
259 |
|
260 |
@spaces.GPU
|
|
|
311 |
messages = rebuild_messages(history)
|
312 |
|
313 |
# ํ์์์ ์ค์
|
|
|
|
|
314 |
class TimeoutError(Exception):
|
315 |
pass
|
316 |
|
|
|
385 |
continue
|
386 |
|
387 |
# ์ต๋ 30์ด ๋๊ธฐ ํ ๋ค์ ๋จ๊ณ๋ก ์งํ
|
|
|
388 |
join_start_time = time.time()
|
389 |
while t.is_alive() and (time.time() - join_start_time) < 30:
|
390 |
t.join(1) # 1์ด๋ง๋ค ํ์ธ
|
|
|
426 |
|
427 |
return "\n".join(gpu_info)
|
428 |
|
429 |
+
# ์๋ ๋ชจ๋ธ ๋ก๋ ํจ์ (์ํ ์
๋ฐ์ดํธ ํฌํจ)
|
430 |
+
def auto_load_model():
|
431 |
+
# ์ฒซ ๋ฒ์งธ ๋ชจ๋ธ ์๋ ๋ก๋
|
432 |
+
model_key = DEFAULT_MODEL_KEY
|
433 |
+
try:
|
434 |
+
# ์งํ ์ํ ํ์๋ฅผ ์ํ ๋น ๊ฒฐ๊ณผ ๋ฐํ
|
435 |
+
return "์์ ๋ชจ๋ธ ์๋ ๋ก๋ ์ค... ์ ์ ๊ธฐ๋ค๋ ค์ฃผ์ธ์."
|
436 |
+
except Exception as e:
|
437 |
+
return f"์๋ ๋ชจ๋ธ ๋ก๋ ์คํจ: {str(e)}"
|
438 |
+
|
439 |
+
# ์ค์ ๋ชจ๋ธ ๋ก๋ ํจ์ (๋น๋๊ธฐ)
|
440 |
+
def load_model_async(model_status):
|
441 |
+
# ๋น๋๊ธฐ ํจ์๋ก ๋ชจ๋ธ ๋ก๋ (์ค์ ๋ก๋๋ ๋ฐฑ๊ทธ๋ผ์ด๋์์ ์ํ)
|
442 |
+
model_key = DEFAULT_MODEL_KEY
|
443 |
+
|
444 |
+
def update_status(status):
|
445 |
+
model_status.update(value=status)
|
446 |
+
|
447 |
+
# ๋ณ๋ ์ค๋ ๋์์ ๋ก๋
|
448 |
+
def load_in_thread():
|
449 |
+
try:
|
450 |
+
result = load_model([model_key], update_status)
|
451 |
+
model_status.update(value=result)
|
452 |
+
except Exception as e:
|
453 |
+
model_status.update(value=f"๋ชจ๋ธ ๋ก๋ ์คํจ: {str(e)}")
|
454 |
+
|
455 |
+
threading.Thread(target=load_in_thread, daemon=True).start()
|
456 |
+
return "๋ชจ๋ธ ๋ก๋ ์ค๋น ์ค... ์๋์ผ๋ก ์งํ๋ฉ๋๋ค."
|
457 |
+
|
458 |
# Gradio ์ธํฐํ์ด์ค
|
459 |
with gr.Blocks(fill_height=True, title="ThinkFlow - Step-by-step Reasoning Service") as demo:
|
460 |
# ์๋จ์ ํ์ดํ๊ณผ ์ค๋ช
์ถ๊ฐ
|
|
|
488 |
gr.Markdown("""## ๋ชจ๋ธ ์ ํ""")
|
489 |
model_selector = gr.Radio(
|
490 |
choices=list(available_models.values()),
|
491 |
+
value=DEFAULT_MODEL_VALUE,
|
492 |
label="์ฌ์ฉํ LLM ๋ชจ๋ธ ์ ํ",
|
493 |
)
|
494 |
|
|
|
504 |
num_tokens = gr.Slider(
|
505 |
50,
|
506 |
2000,
|
507 |
+
1000,
|
508 |
step=50,
|
509 |
label="์ถ๋ก ๋จ๊ณ๋น ์ต๋ ํ ํฐ ์",
|
510 |
interactive=True,
|
|
|
512 |
final_num_tokens = gr.Slider(
|
513 |
50,
|
514 |
3000,
|
515 |
+
1500,
|
516 |
step=50,
|
517 |
label="์ต์ข
๋ต๋ณ์ ์ต๋ ํ ํฐ ์",
|
518 |
interactive=True,
|
|
|
520 |
do_sample = gr.Checkbox(True, label="์ํ๋ง ์ฌ์ฉ")
|
521 |
temperature = gr.Slider(0.1, 1.0, 0.7, step=0.1, label="์จ๋")
|
522 |
|
523 |
+
# ์์ ์ ์๋์ผ๋ก ์ด๊ธฐํ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
524 |
demo.load(auto_load_model, [], [model_status])
|
525 |
|
526 |
+
# ์์ ํ ๋น๋๊ธฐ์ ์ผ๋ก ๋ชจ๋ธ ๋ก๋ (์ด๊ธฐ ํ๋ฉด ํ์ ์ง์ฐ ๋ฐฉ์ง)
|
527 |
+
demo.load(lambda x: load_model_async(x), [model_status], [], _js="() => {}")
|
528 |
+
|
529 |
# ์ ํ๋ ๋ชจ๋ธ ๋ก๋ ์ด๋ฒคํธ ์ฐ๊ฒฐ
|
530 |
def get_model_names(selected_model):
|
531 |
# ํ์ ์ด๋ฆ์์ ์๋ ๋ชจ๋ธ ์ด๋ฆ์ผ๋ก ๋ณํ
|