Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -234,43 +234,55 @@ def bot(
|
|
234 |
history.append(
|
235 |
gr.ChatMessage(
|
236 |
role="assistant",
|
237 |
-
content="λͺ¨λΈμ΄ λ‘λλμ§ μμμ΅λλ€. νλ μ΄μμ λͺ¨λΈμ
|
238 |
)
|
239 |
)
|
240 |
yield history
|
241 |
return
|
242 |
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
|
|
257 |
|
258 |
-
|
259 |
-
|
260 |
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
|
|
267 |
)
|
268 |
-
)
|
269 |
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
274 |
for i, prepend in enumerate(rethink_prepends):
|
275 |
if i > 0:
|
276 |
messages[-1]["content"] += "\n\n"
|
@@ -294,6 +306,7 @@ def bot(
|
|
294 |
use_cache=True, # KV μΊμ μ¬μ©
|
295 |
),
|
296 |
)
|
|
|
297 |
t.start()
|
298 |
|
299 |
# μ λ΄μ©μΌλ‘ νμ€ν 리 μ¬κ΅¬μ±
|
@@ -302,14 +315,48 @@ def bot(
|
|
302 |
history[-1].metadata = {"title": "π μ¬κ³ κ³Όμ ", "status": "done"}
|
303 |
# μκ° μ’
λ£, μ΄μ λ΅λ³μ
λλ€ (μ€κ° λ¨κ³μ λν λ©νλ°μ΄ν° μμ)
|
304 |
history.append(gr.ChatMessage(role="assistant", content=""))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
305 |
|
306 |
-
|
307 |
-
for token in streamer:
|
308 |
-
history[-1].content += token
|
309 |
-
history[-1].content = reformat_math(history[-1].content)
|
310 |
yield history
|
311 |
|
312 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
313 |
|
314 |
# λν λͺ¨λΈμΈ κ²½μ° κ° λ¨κ³ ν λΆλΆμ λ©λͺ¨λ¦¬ μ 리
|
315 |
if size_category == "large" and torch.cuda.is_available():
|
@@ -317,9 +364,15 @@ def bot(
|
|
317 |
|
318 |
except Exception as e:
|
319 |
# μ€λ₯ λ°μμ μ¬μ©μμκ² μλ¦Ό
|
320 |
-
|
321 |
-
|
322 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
323 |
|
324 |
yield history
|
325 |
|
@@ -402,6 +455,19 @@ with gr.Blocks(fill_height=True, title="ThinkFlow - Step-by-step Reasoning Servi
|
|
402 |
do_sample = gr.Checkbox(True, label="μνλ§ μ¬μ©")
|
403 |
temperature = gr.Slider(0.1, 1.0, 0.7, step=0.1, label="μ¨λ")
|
404 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
405 |
# μ νλ λͺ¨λΈ λ‘λ μ΄λ²€νΈ μ°κ²°
|
406 |
def get_model_names(selected_model):
|
407 |
# νμ μ΄λ¦μμ μλ λͺ¨λΈ μ΄λ¦μΌλ‘ λ³ν
|
|
|
234 |
history.append(
|
235 |
gr.ChatMessage(
|
236 |
role="assistant",
|
237 |
+
content="λͺ¨λΈμ΄ λ‘λλμ§ μμμ΅λλ€. νλ μ΄μμ λͺ¨λΈμ μ ννκ³ 'λͺ¨λΈ λ‘λ' λ²νΌμ ν΄λ¦ν΄ μ£ΌμΈμ.",
|
238 |
)
|
239 |
)
|
240 |
yield history
|
241 |
return
|
242 |
|
243 |
+
try:
|
244 |
+
# ν ν° κΈΈμ΄ μλ μ‘°μ (λͺ¨λΈ ν¬κΈ°μ λ°λΌ)
|
245 |
+
size_category = get_model_size_category(current_model_name)
|
246 |
+
|
247 |
+
# λν λͺ¨λΈμ ν ν° μλ₯Ό μ€μ¬ λ©λͺ¨λ¦¬ ν¨μ¨μ± ν₯μ
|
248 |
+
if size_category == "large":
|
249 |
+
max_num_tokens = min(max_num_tokens, 1000)
|
250 |
+
final_num_tokens = min(final_num_tokens, 1500)
|
251 |
+
|
252 |
+
# λμ€μ μ€λ λμμ ν ν°μ μ€νΈλ¦ΌμΌλ‘ κ°μ Έμ€κΈ° μν¨
|
253 |
+
streamer = transformers.TextIteratorStreamer(
|
254 |
+
pipe.tokenizer,
|
255 |
+
skip_special_tokens=True,
|
256 |
+
skip_prompt=True,
|
257 |
+
)
|
258 |
|
259 |
+
# νμν κ²½μ° μΆλ‘ μ μ§λ¬Έμ λ€μ μ½μ
νκΈ° μν¨
|
260 |
+
question = history[-1]["content"]
|
261 |
|
262 |
+
# 보쑰μ λ©μμ§ μ€λΉ
|
263 |
+
history.append(
|
264 |
+
gr.ChatMessage(
|
265 |
+
role="assistant",
|
266 |
+
content=str(""),
|
267 |
+
metadata={"title": "π§ μκ° μ€...", "status": "pending"},
|
268 |
+
)
|
269 |
)
|
|
|
270 |
|
271 |
+
# νμ¬ μ±ν
μ νμλ μΆλ‘ κ³Όμ
|
272 |
+
messages = rebuild_messages(history)
|
273 |
+
|
274 |
+
# νμμμ μ€μ
|
275 |
+
import signal
|
276 |
+
|
277 |
+
class TimeoutError(Exception):
|
278 |
+
pass
|
279 |
+
|
280 |
+
def timeout_handler(signum, frame):
|
281 |
+
raise TimeoutError("μμ² μ²λ¦¬ μκ°μ΄ μ΄κ³Όλμμ΅λλ€.")
|
282 |
+
|
283 |
+
# κ° λ¨κ³λ§λ€ μ΅λ 120μ΄ νμμμ μ€μ
|
284 |
+
timeout_seconds = 120
|
285 |
+
|
286 |
for i, prepend in enumerate(rethink_prepends):
|
287 |
if i > 0:
|
288 |
messages[-1]["content"] += "\n\n"
|
|
|
306 |
use_cache=True, # KV μΊμ μ¬μ©
|
307 |
),
|
308 |
)
|
309 |
+
t.daemon = True # λ°λͺ¬ μ€λ λλ‘ μ€μ νμ¬ λ©μΈ μ€λ λκ° μ’
λ£λλ©΄ ν¨κ» μ’
λ£
|
310 |
t.start()
|
311 |
|
312 |
# μ λ΄μ©μΌλ‘ νμ€ν 리 μ¬κ΅¬μ±
|
|
|
315 |
history[-1].metadata = {"title": "π μ¬κ³ κ³Όμ ", "status": "done"}
|
316 |
# μκ° μ’
λ£, μ΄μ λ΅λ³μ
λλ€ (μ€κ° λ¨κ³μ λν λ©νλ°μ΄ν° μμ)
|
317 |
history.append(gr.ChatMessage(role="assistant", content=""))
|
318 |
+
|
319 |
+
# νμμμ μ€μ (Unix μμ€ν
μμλ§ μλ)
|
320 |
+
try:
|
321 |
+
if hasattr(signal, 'SIGALRM'):
|
322 |
+
signal.signal(signal.SIGALRM, timeout_handler)
|
323 |
+
signal.alarm(timeout_seconds)
|
324 |
+
|
325 |
+
# ν ν° μ€νΈλ¦¬λ°
|
326 |
+
token_count = 0
|
327 |
+
for token in streamer:
|
328 |
+
history[-1].content += token
|
329 |
+
history[-1].content = reformat_math(history[-1].content)
|
330 |
+
token_count += 1
|
331 |
+
|
332 |
+
# 10κ° ν ν°λ§λ€ yield (UI μλ΅μ± ν₯μ)
|
333 |
+
if token_count % 10 == 0:
|
334 |
+
yield history
|
335 |
|
336 |
+
# λ¨μ λ΄μ© yield
|
|
|
|
|
|
|
337 |
yield history
|
338 |
|
339 |
+
# νμμμ ν΄μ
|
340 |
+
if hasattr(signal, 'SIGALRM'):
|
341 |
+
signal.alarm(0)
|
342 |
+
|
343 |
+
except TimeoutError:
|
344 |
+
if hasattr(signal, 'SIGALRM'):
|
345 |
+
signal.alarm(0)
|
346 |
+
history[-1].content += "\n\nβ οΈ μλ΅ μοΏ½οΏ½ μκ°μ΄ μ΄κ³Όλμμ΅λλ€. λ€μ λ¨κ³λ‘ μ§νν©λλ€."
|
347 |
+
yield history
|
348 |
+
continue
|
349 |
+
|
350 |
+
# μ΅λ 30μ΄ λκΈ° ν λ€μ λ¨κ³λ‘ μ§ν
|
351 |
+
import time
|
352 |
+
join_start_time = time.time()
|
353 |
+
while t.is_alive() and (time.time() - join_start_time) < 30:
|
354 |
+
t.join(1) # 1μ΄λ§λ€ νμΈ
|
355 |
+
|
356 |
+
# μ€λ λκ° μ¬μ ν μ€ν μ€μ΄λ©΄ κ°μ μ§ν
|
357 |
+
if t.is_alive():
|
358 |
+
history[-1].content += "\n\nβ οΈ μλ΅ μμ±μ΄ μμλ³΄λ€ μ€λ 걸립λλ€. λ€μ λ¨κ³λ‘ μ§νν©λλ€."
|
359 |
+
yield history
|
360 |
|
361 |
# λν λͺ¨λΈμΈ κ²½μ° κ° λ¨κ³ ν λΆλΆμ λ©λͺ¨λ¦¬ μ 리
|
362 |
if size_category == "large" and torch.cuda.is_available():
|
|
|
364 |
|
365 |
except Exception as e:
|
366 |
# μ€λ₯ λ°μμ μ¬μ©μμκ² μλ¦Ό
|
367 |
+
import traceback
|
368 |
+
error_msg = f"\n\nβ οΈ μ²λ¦¬ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}\n{traceback.format_exc()}"
|
369 |
+
|
370 |
+
if len(history) > 0 and isinstance(history[-1], gr.ChatMessage) and history[-1].role == "assistant":
|
371 |
+
history[-1].content += error_msg
|
372 |
+
else:
|
373 |
+
history.append(gr.ChatMessage(role="assistant", content=error_msg))
|
374 |
+
|
375 |
+
yield history
|
376 |
|
377 |
yield history
|
378 |
|
|
|
455 |
do_sample = gr.Checkbox(True, label="μνλ§ μ¬μ©")
|
456 |
temperature = gr.Slider(0.1, 1.0, 0.7, step=0.1, label="μ¨λ")
|
457 |
|
458 |
+
# μλ λͺ¨λΈ λ‘λ κΈ°λ₯ μΆκ°
|
459 |
+
def auto_load_model():
|
460 |
+
# 첫 λ²μ§Έ λͺ¨λΈ μλ λ‘λ
|
461 |
+
model_key = DEFAULT_MODEL_KEY
|
462 |
+
try:
|
463 |
+
result = load_model([model_key])
|
464 |
+
return result
|
465 |
+
except Exception as e:
|
466 |
+
return f"μλ λͺ¨λΈ λ‘λ μ€ν¨: {str(e)}"
|
467 |
+
|
468 |
+
# μμ μ μλμΌλ‘ λͺ¨λΈ λ‘λ (μ€νμ΄μ€κ° μμλ λ)
|
469 |
+
demo.load(auto_load_model, [], [model_status])
|
470 |
+
|
471 |
# μ νλ λͺ¨λΈ λ‘λ μ΄λ²€νΈ μ°κ²°
|
472 |
def get_model_names(selected_model):
|
473 |
# νμ μ΄λ¦μμ μλ λͺ¨λΈ μ΄λ¦μΌλ‘ λ³ν
|