Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
|
2 |
import huggingface_hub as hf_hub
|
3 |
import time
|
4 |
import openvino_genai as ov_genai
|
@@ -20,14 +19,22 @@ tokenizer.set_chat_template(tokenizer.chat_template)
|
|
20 |
|
21 |
|
22 |
def generate_response(prompt):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
try:
|
24 |
-
|
25 |
-
|
|
|
26 |
|
27 |
-
return tokenpersec,
|
28 |
except Exception as e:
|
29 |
return "發生錯誤", "發生錯誤", f"生成回應時發生錯誤:{e}"
|
30 |
-
|
31 |
|
32 |
# 建立 Gradio 介面
|
33 |
demo = gr.Interface(
|
|
|
|
|
1 |
import huggingface_hub as hf_hub
|
2 |
import time
|
3 |
import openvino_genai as ov_genai
|
|
|
19 |
|
20 |
|
21 |
def generate_response(prompt):
|
22 |
+
full_response = "" # 用於儲存完整的回應
|
23 |
+
|
24 |
+
def streamer(subword):
|
25 |
+
nonlocal full_response
|
26 |
+
full_response += subword
|
27 |
+
return full_response # 返回更新後的完整回應
|
28 |
+
|
29 |
try:
|
30 |
+
# 使用流式生成
|
31 |
+
generated = pipe.generate(prompt, streamer=streamer, max_new_tokens=100)
|
32 |
+
tokenpersec = f'{generated.perf_metrics.get_throughput().mean:.2f}' # 恢復原本計算 tokenpersec 的方式
|
33 |
|
34 |
+
return tokenpersec, full_response
|
35 |
except Exception as e:
|
36 |
return "發生錯誤", "發生錯誤", f"生成回應時發生錯誤:{e}"
|
37 |
+
|
38 |
|
39 |
# 建立 Gradio 介面
|
40 |
demo = gr.Interface(
|