Spaces:

hsuwill000
/

qwen3_test

Running

App Files Files Community

qwen3_test / app.py

hsuwill000

Update app.py

07268e3 verified 2 months ago

raw

history blame

2.02 kB

	import huggingface_hub as hf_hub
	import time
	import openvino_genai as ov_genai
	import numpy as np
	import gradio as gr
	import re

	model_id = "OpenVINO/Qwen3-0.6B-int4-ov"
	model_path = "Qwen3-0.6B-int4-ov"

	hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)


	pipe = ov_genai.LLMPipeline(model_path, "CPU")
	pipe.start_chat() # 初始化聊天狀態

	def generate(prompt, history):
	"""
	與 LLM 互動，並使用 `yield` 串流輸出回應。
	"""
	global pipe # 允許修改全域的 pipeline 物件
	full_response = ""

	def streamer(subword):
	nonlocal full_response # 允許修改封閉範圍的 full_response 變數
	full_response += subword
	yield full_response # 逐步產生回應
	return ov_genai.StreamingStatus.RUNNING

	# Gradio 會傳入 history，所以我們用它建立 prompts
	# 如果需要更複雜的prompt 處理，可以在這裡添加
	# 例如: 建立 system prompt, 用過去的對話建立完整的prompts

	for value in pipe.generate(prompt, streamer=streamer, max_new_tokens=100):
	yield value # Streamer 已經在yield full_response了，這裡只需要把streamer的產出再次yield出去

	# 結束生成後，可以添加一些邏輯，例如記錄對話或更新狀態
	# ...

	def on_close():
	global pipe
	pipe.finish_chat() # 在應用結束時清理pipeline
	print("Chat finished and pipeline closed.")


	if __name__ == "__main__":
	demo = gr.ChatInterface(
	generate,
	chatbot=gr.Chatbot(height=300),
	textbox=gr.Textbox(placeholder="請輸入您的訊息...", container=False, scale=7),
	title="LLM 串流輸出範例 (OpenVINO)",
	description="這個範例示範如何使用 Gradio 串流輸出 OpenVINO GenAI 的回應。",
	theme="soft",
	examples=["你好", "請自我介紹一下", "今天的氣候如何？"],
	)
	demo.close(on_close) # 添加在應用關閉時的清理函數
	demo.launch()