hsuwill000 commited on
Commit
7d7759a
·
verified ·
1 Parent(s): 3592035

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -34
app.py CHANGED
@@ -18,43 +18,29 @@ tokenizer = pipe.get_tokenizer()
18
  tokenizer.set_chat_template(tokenizer.chat_template)
19
 
20
 
21
- def generate_response(prompt, history=[]):
22
- full_response = ""
23
- token_count = 0
24
- start_time = time.time()
25
 
26
- def streamer(subword):
27
- nonlocal full_response, token_count
28
- full_response += subword
29
- token_count += 1
30
- return ov_genai.StreamingStatus.RUNNING
31
-
32
- pipe.start_chat(history) # Pass the history to start_chat
33
- pipe.generate(prompt, streamer=streamer, max_new_tokens=1024) # Adjust max_new_tokens as needed
34
- pipe.finish_chat() # Finish the chat
35
-
36
- end_time = time.time()
37
- elapsed_time = end_time - start_time
38
- tokenpersec = f'{token_count / elapsed_time:.2f}' if elapsed_time > 0 else "0.00"
39
-
40
- history.append((prompt, full_response)) # Store the conversation history
41
- return tokenpersec, history
42
 
43
  # 建立 Gradio 介面
44
- with gr.Blocks() as demo:
45
- chatbot = gr.Chatbot()
46
- state = gr.State([])
47
- msg = gr.Textbox(label="輸入提示 (Prompt)")
48
-
49
- def respond(message, chat_history):
50
- tokenpersec, chat_history = generate_response(message, chat_history)
51
- response = chat_history[-1][1]
52
- return "", chat_history, tokenpersec
53
-
54
- msg.submit(respond, [msg, state], [msg, chatbot, gr.Textbox(label="tokens/sec")])
55
-
56
- demo.title = "Qwen3-0.6B-int4-ov Streaming Demo"
57
- demo.description = "基於 Qwen3-0.6B-int4-ov 推理應用,支援流式輸出與 GUI。"
58
 
59
  if __name__ == "__main__":
60
  demo.launch()
 
18
  tokenizer.set_chat_template(tokenizer.chat_template)
19
 
20
 
21
+ def generate_response(prompt):
22
+ try:
23
+ generated = pipe.generate([prompt], max_length=1024)
24
+ tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
25
 
26
+ return tokenpersec, generated
27
+ except Exception as e:
28
+ return "發生錯誤", "發生錯誤", f"生成回應時發生錯誤:{e}"
29
+
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  # 建立 Gradio 介面
32
+ demo = gr.Interface(
33
+ fn=generate_response,
34
+ inputs=gr.Textbox(lines=5, label="輸入提示 (Prompt)"),
35
+ outputs=[
36
+ gr.Textbox(label="tokens/sec"),
37
+ #gr.Textbox(label="思考過程"),
38
+ #gr.Textbox(label="最終回應")
39
+ gr.Textbox(label="回應")
40
+ ],
41
+ title="Qwen3-0.6B-int4-ov ",
42
+ description="基於 Qwen3-0.6B-int4-ov 推理應用,支援思考過程分離與 GUI。"
43
+ )
 
 
44
 
45
  if __name__ == "__main__":
46
  demo.launch()