Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -30,6 +30,7 @@ def finish_chat():
|
|
30 |
|
31 |
# 建立推論函式:使用 streamer 並回傳 generator 結果
|
32 |
def generate_stream(prompt):
|
|
|
33 |
q = queue.Queue()
|
34 |
tps_result = ""
|
35 |
def streamer(subword):
|
@@ -40,7 +41,6 @@ def generate_stream(prompt):
|
|
40 |
def worker():
|
41 |
# 在背景 thread 中做推論
|
42 |
nonlocal tps_result
|
43 |
-
prompt = prompt + "/no_think"
|
44 |
gen_result = pipe.generate([prompt], streamer=streamer, max_new_tokens=32767)
|
45 |
tps = gen_result.perf_metrics.get_throughput().mean
|
46 |
tps_result = f"{tps:.2f} tokens/s"
|
|
|
30 |
|
31 |
# 建立推論函式:使用 streamer 並回傳 generator 結果
|
32 |
def generate_stream(prompt):
|
33 |
+
prompt = prompt + " /no_think"
|
34 |
q = queue.Queue()
|
35 |
tps_result = ""
|
36 |
def streamer(subword):
|
|
|
41 |
def worker():
|
42 |
# 在背景 thread 中做推論
|
43 |
nonlocal tps_result
|
|
|
44 |
gen_result = pipe.generate([prompt], streamer=streamer, max_new_tokens=32767)
|
45 |
tps = gen_result.perf_metrics.get_throughput().mean
|
46 |
tps_result = f"{tps:.2f} tokens/s"
|