Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -26,7 +26,7 @@ def finish_chat():
|
|
26 |
# 建立推論函式:使用 streamer 並回傳 generator 結果
|
27 |
def generate_stream(prompt):
|
28 |
q = queue.Queue()
|
29 |
-
|
30 |
def streamer(subword):
|
31 |
print(subword, end='', flush=True)
|
32 |
q.put(subword)
|
@@ -34,6 +34,7 @@ def generate_stream(prompt):
|
|
34 |
|
35 |
def worker():
|
36 |
# 在背景 thread 中做推論
|
|
|
37 |
gen_result = pipe.generate([prompt], streamer=streamer, max_new_tokens=4096)
|
38 |
tps = gen_result.perf_metrics.get_throughput().mean
|
39 |
tps_result = f"{tps:.2f} tokens/s"
|
|
|
26 |
# 建立推論函式:使用 streamer 並回傳 generator 結果
|
27 |
def generate_stream(prompt):
|
28 |
q = queue.Queue()
|
29 |
+
tps_result = ""
|
30 |
def streamer(subword):
|
31 |
print(subword, end='', flush=True)
|
32 |
q.put(subword)
|
|
|
34 |
|
35 |
def worker():
|
36 |
# 在背景 thread 中做推論
|
37 |
+
nonlocal tps_result
|
38 |
gen_result = pipe.generate([prompt], streamer=streamer, max_new_tokens=4096)
|
39 |
tps = gen_result.perf_metrics.get_throughput().mean
|
40 |
tps_result = f"{tps:.2f} tokens/s"
|