openfree commited on
Commit
5969407
Β·
verified Β·
1 Parent(s): 60f9305

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -36
app.py CHANGED
@@ -234,43 +234,55 @@ def bot(
234
  history.append(
235
  gr.ChatMessage(
236
  role="assistant",
237
- content="λͺ¨λΈμ΄ λ‘œλ“œλ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€. ν•˜λ‚˜ μ΄μƒμ˜ λͺ¨λΈμ„ 선택해 μ£Όμ„Έμš”.",
238
  )
239
  )
240
  yield history
241
  return
242
 
243
- # 토큰 길이 μžλ™ μ‘°μ • (λͺ¨λΈ 크기에 따라)
244
- size_category = get_model_size_category(current_model_name)
245
-
246
- # λŒ€ν˜• λͺ¨λΈμ€ 토큰 수λ₯Ό 쀄여 λ©”λͺ¨λ¦¬ νš¨μœ¨μ„± ν–₯상
247
- if size_category == "large":
248
- max_num_tokens = min(max_num_tokens, 1000)
249
- final_num_tokens = min(final_num_tokens, 1500)
250
-
251
- # λ‚˜μ€‘μ— μŠ€λ ˆλ“œμ—μ„œ 토큰을 슀트림으둜 κ°€μ Έμ˜€κΈ° μœ„ν•¨
252
- streamer = transformers.TextIteratorStreamer(
253
- pipe.tokenizer,
254
- skip_special_tokens=True,
255
- skip_prompt=True,
256
- )
 
257
 
258
- # ν•„μš”ν•œ 경우 좔둠에 μ§ˆλ¬Έμ„ λ‹€μ‹œ μ‚½μž…ν•˜κΈ° μœ„ν•¨
259
- question = history[-1]["content"]
260
 
261
- # 보쑰자 λ©”μ‹œμ§€ μ€€λΉ„
262
- history.append(
263
- gr.ChatMessage(
264
- role="assistant",
265
- content=str(""),
266
- metadata={"title": "🧠 생각 쀑...", "status": "pending"},
 
267
  )
268
- )
269
 
270
- # ν˜„μž¬ μ±„νŒ…μ— ν‘œμ‹œλ  μΆ”λ‘  κ³Όμ •
271
- messages = rebuild_messages(history)
272
-
273
- try:
 
 
 
 
 
 
 
 
 
 
 
274
  for i, prepend in enumerate(rethink_prepends):
275
  if i > 0:
276
  messages[-1]["content"] += "\n\n"
@@ -294,6 +306,7 @@ def bot(
294
  use_cache=True, # KV μΊμ‹œ μ‚¬μš©
295
  ),
296
  )
 
297
  t.start()
298
 
299
  # μƒˆ λ‚΄μš©μœΌλ‘œ νžˆμŠ€ν† λ¦¬ μž¬κ΅¬μ„±
@@ -302,14 +315,48 @@ def bot(
302
  history[-1].metadata = {"title": "πŸ’­ 사고 κ³Όμ •", "status": "done"}
303
  # 생각 μ’…λ£Œ, 이제 λ‹΅λ³€μž…λ‹ˆλ‹€ (쀑간 단계에 λŒ€ν•œ 메타데이터 μ—†μŒ)
304
  history.append(gr.ChatMessage(role="assistant", content=""))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
 
306
- # 토큰 슀트리밍
307
- for token in streamer:
308
- history[-1].content += token
309
- history[-1].content = reformat_math(history[-1].content)
310
  yield history
311
 
312
- t.join()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
313
 
314
  # λŒ€ν˜• λͺ¨λΈμΈ 경우 각 단계 ν›„ 뢀뢄적 λ©”λͺ¨λ¦¬ 정리
315
  if size_category == "large" and torch.cuda.is_available():
@@ -317,9 +364,15 @@ def bot(
317
 
318
  except Exception as e:
319
  # 였λ₯˜ λ°œμƒμ‹œ μ‚¬μš©μžμ—κ²Œ μ•Œλ¦Ό
320
- if len(history) > 0 and history[-1].role == "assistant":
321
- history[-1].content += f"\n\n⚠️ 처리 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"
322
- yield history
 
 
 
 
 
 
323
 
324
  yield history
325
 
@@ -402,6 +455,19 @@ with gr.Blocks(fill_height=True, title="ThinkFlow - Step-by-step Reasoning Servi
402
  do_sample = gr.Checkbox(True, label="μƒ˜ν”Œλ§ μ‚¬μš©")
403
  temperature = gr.Slider(0.1, 1.0, 0.7, step=0.1, label="μ˜¨λ„")
404
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405
  # μ„ νƒλœ λͺ¨λΈ λ‘œλ“œ 이벀트 μ—°κ²°
406
  def get_model_names(selected_model):
407
  # ν‘œμ‹œ μ΄λ¦„μ—μ„œ μ›λž˜ λͺ¨λΈ μ΄λ¦„μœΌλ‘œ λ³€ν™˜
 
234
  history.append(
235
  gr.ChatMessage(
236
  role="assistant",
237
+ content="λͺ¨λΈμ΄ λ‘œλ“œλ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€. ν•˜λ‚˜ μ΄μƒμ˜ λͺ¨λΈμ„ μ„ νƒν•˜κ³  'λͺ¨λΈ λ‘œλ“œ' λ²„νŠΌμ„ 클릭해 μ£Όμ„Έμš”.",
238
  )
239
  )
240
  yield history
241
  return
242
 
243
+ try:
244
+ # 토큰 길이 μžλ™ μ‘°μ • (λͺ¨λΈ 크기에 따라)
245
+ size_category = get_model_size_category(current_model_name)
246
+
247
+ # λŒ€ν˜• λͺ¨λΈμ€ 토큰 수λ₯Ό 쀄여 λ©”λͺ¨λ¦¬ νš¨μœ¨μ„± ν–₯상
248
+ if size_category == "large":
249
+ max_num_tokens = min(max_num_tokens, 1000)
250
+ final_num_tokens = min(final_num_tokens, 1500)
251
+
252
+ # λ‚˜μ€‘μ— μŠ€λ ˆλ“œμ—μ„œ 토큰을 슀트림으둜 κ°€μ Έμ˜€κΈ° μœ„ν•¨
253
+ streamer = transformers.TextIteratorStreamer(
254
+ pipe.tokenizer,
255
+ skip_special_tokens=True,
256
+ skip_prompt=True,
257
+ )
258
 
259
+ # ν•„μš”ν•œ 경우 좔둠에 μ§ˆλ¬Έμ„ λ‹€μ‹œ μ‚½μž…ν•˜κΈ° μœ„ν•¨
260
+ question = history[-1]["content"]
261
 
262
+ # 보쑰자 λ©”μ‹œμ§€ μ€€λΉ„
263
+ history.append(
264
+ gr.ChatMessage(
265
+ role="assistant",
266
+ content=str(""),
267
+ metadata={"title": "🧠 생각 쀑...", "status": "pending"},
268
+ )
269
  )
 
270
 
271
+ # ν˜„μž¬ μ±„νŒ…μ— ν‘œμ‹œλ  μΆ”λ‘  κ³Όμ •
272
+ messages = rebuild_messages(history)
273
+
274
+ # νƒ€μž„μ•„μ›ƒ μ„€μ •
275
+ import signal
276
+
277
+ class TimeoutError(Exception):
278
+ pass
279
+
280
+ def timeout_handler(signum, frame):
281
+ raise TimeoutError("μš”μ²­ 처리 μ‹œκ°„μ΄ μ΄ˆκ³Όλ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
282
+
283
+ # 각 λ‹¨κ³„λ§ˆλ‹€ μ΅œλŒ€ 120초 νƒ€μž„μ•„μ›ƒ μ„€μ •
284
+ timeout_seconds = 120
285
+
286
  for i, prepend in enumerate(rethink_prepends):
287
  if i > 0:
288
  messages[-1]["content"] += "\n\n"
 
306
  use_cache=True, # KV μΊμ‹œ μ‚¬μš©
307
  ),
308
  )
309
+ t.daemon = True # 데λͺ¬ μŠ€λ ˆλ“œλ‘œ μ„€μ •ν•˜μ—¬ 메인 μŠ€λ ˆλ“œκ°€ μ’…λ£Œλ˜λ©΄ ν•¨κ»˜ μ’…λ£Œ
310
  t.start()
311
 
312
  # μƒˆ λ‚΄μš©μœΌλ‘œ νžˆμŠ€ν† λ¦¬ μž¬κ΅¬μ„±
 
315
  history[-1].metadata = {"title": "πŸ’­ 사고 κ³Όμ •", "status": "done"}
316
  # 생각 μ’…λ£Œ, 이제 λ‹΅λ³€μž…λ‹ˆλ‹€ (쀑간 단계에 λŒ€ν•œ 메타데이터 μ—†μŒ)
317
  history.append(gr.ChatMessage(role="assistant", content=""))
318
+
319
+ # νƒ€μž„μ•„μ›ƒ μ„€μ • (Unix μ‹œμŠ€ν…œμ—μ„œλ§Œ μž‘λ™)
320
+ try:
321
+ if hasattr(signal, 'SIGALRM'):
322
+ signal.signal(signal.SIGALRM, timeout_handler)
323
+ signal.alarm(timeout_seconds)
324
+
325
+ # 토큰 슀트리밍
326
+ token_count = 0
327
+ for token in streamer:
328
+ history[-1].content += token
329
+ history[-1].content = reformat_math(history[-1].content)
330
+ token_count += 1
331
+
332
+ # 10개 ν† ν°λ§ˆλ‹€ yield (UI 응닡성 ν–₯상)
333
+ if token_count % 10 == 0:
334
+ yield history
335
 
336
+ # 남은 λ‚΄μš© yield
 
 
 
337
  yield history
338
 
339
+ # νƒ€μž„μ•„μ›ƒ ν•΄μ œ
340
+ if hasattr(signal, 'SIGALRM'):
341
+ signal.alarm(0)
342
+
343
+ except TimeoutError:
344
+ if hasattr(signal, 'SIGALRM'):
345
+ signal.alarm(0)
346
+ history[-1].content += "\n\n⚠️ 응닡 생�� μ‹œκ°„μ΄ μ΄ˆκ³Όλ˜μ—ˆμŠ΅λ‹ˆλ‹€. λ‹€μŒ λ‹¨κ³„λ‘œ μ§„ν–‰ν•©λ‹ˆλ‹€."
347
+ yield history
348
+ continue
349
+
350
+ # μ΅œλŒ€ 30초 λŒ€κΈ° ν›„ λ‹€μŒ λ‹¨κ³„λ‘œ μ§„ν–‰
351
+ import time
352
+ join_start_time = time.time()
353
+ while t.is_alive() and (time.time() - join_start_time) < 30:
354
+ t.join(1) # 1μ΄ˆλ§ˆλ‹€ 확인
355
+
356
+ # μŠ€λ ˆλ“œκ°€ μ—¬μ „νžˆ μ‹€ν–‰ 쀑이면 κ°•μ œ μ§„ν–‰
357
+ if t.is_alive():
358
+ history[-1].content += "\n\n⚠️ 응닡 생성이 μ˜ˆμƒλ³΄λ‹€ 였래 κ±Έλ¦½λ‹ˆλ‹€. λ‹€μŒ λ‹¨κ³„λ‘œ μ§„ν–‰ν•©λ‹ˆλ‹€."
359
+ yield history
360
 
361
  # λŒ€ν˜• λͺ¨λΈμΈ 경우 각 단계 ν›„ 뢀뢄적 λ©”λͺ¨λ¦¬ 정리
362
  if size_category == "large" and torch.cuda.is_available():
 
364
 
365
  except Exception as e:
366
  # 였λ₯˜ λ°œμƒμ‹œ μ‚¬μš©μžμ—κ²Œ μ•Œλ¦Ό
367
+ import traceback
368
+ error_msg = f"\n\n⚠️ 처리 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}\n{traceback.format_exc()}"
369
+
370
+ if len(history) > 0 and isinstance(history[-1], gr.ChatMessage) and history[-1].role == "assistant":
371
+ history[-1].content += error_msg
372
+ else:
373
+ history.append(gr.ChatMessage(role="assistant", content=error_msg))
374
+
375
+ yield history
376
 
377
  yield history
378
 
 
455
  do_sample = gr.Checkbox(True, label="μƒ˜ν”Œλ§ μ‚¬μš©")
456
  temperature = gr.Slider(0.1, 1.0, 0.7, step=0.1, label="μ˜¨λ„")
457
 
458
+ # μžλ™ λͺ¨λΈ λ‘œλ“œ κΈ°λŠ₯ μΆ”κ°€
459
+ def auto_load_model():
460
+ # 첫 번째 λͺ¨λΈ μžλ™ λ‘œλ“œ
461
+ model_key = DEFAULT_MODEL_KEY
462
+ try:
463
+ result = load_model([model_key])
464
+ return result
465
+ except Exception as e:
466
+ return f"μžλ™ λͺ¨λΈ λ‘œλ“œ μ‹€νŒ¨: {str(e)}"
467
+
468
+ # μ‹œμž‘ μ‹œ μžλ™μœΌλ‘œ λͺ¨λΈ λ‘œλ“œ (μŠ€νŽ˜μ΄μŠ€κ°€ μ‹œμž‘λ  λ•Œ)
469
+ demo.load(auto_load_model, [], [model_status])
470
+
471
  # μ„ νƒλœ λͺ¨λΈ λ‘œλ“œ 이벀트 μ—°κ²°
472
  def get_model_names(selected_model):
473
  # ν‘œμ‹œ μ΄λ¦„μ—μ„œ μ›λž˜ λͺ¨λΈ μ΄λ¦„μœΌλ‘œ λ³€ν™˜