Spaces:

hsuwill000
/

maxtest01

Running

hsuwill000 commited on Jul 1

Commit

766289c

verified ·

1 Parent(s): 085daf3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import time
 import huggingface_hub as hf_hub
 # 初始化 OpenVINO 模型
-model_id = "OpenVINO/Qwen2-0.5B-int4-ov"
 model_path = "ov"
@@ -27,7 +27,7 @@ def finish_chat():
 # 建立推論函式：使用 streamer 並回傳 generator 結果
 def generate_stream(prompt):
-    prompt = prompt #+ " /no_think" + " 答案短且明瞭"
     q = queue.Queue()
     tps_result = ""
@@ -39,7 +39,7 @@ def generate_stream(prompt):
     def worker():
         # 在背景 thread 中做推論
         nonlocal tps_result
-        gen_result = pipe.generate([prompt], streamer=streamer, max_new_tokens=1024)
         tps = gen_result.perf_metrics.get_throughput().mean
         tps_result = f"{tps:.2f} tokens/s"
         q.put(None)  # 結束符號

 import huggingface_hub as hf_hub
 # 初始化 OpenVINO 模型
+model_id = "OpenVINO/Qwen3-8B-int4-ov"
 model_path = "ov"
 # 建立推論函式：使用 streamer 並回傳 generator 結果
 def generate_stream(prompt):
+    prompt = prompt + " /no_think" + " 答案短且明瞭"
     q = queue.Queue()
     tps_result = ""
     def worker():
         # 在背景 thread 中做推論
         nonlocal tps_result
+        gen_result = pipe.generate([prompt], streamer=streamer, max_new_tokens=2048)
         tps = gen_result.perf_metrics.get_throughput().mean
         tps_result = f"{tps:.2f} tokens/s"
         q.put(None)  # 結束符號