Spaces:

hsuwill000
/

qwen3_test

Running

App Files Files Community

qwen3_test / app.py

hsuwill000

Update app.py

8e9ef4f verified 2 months ago

raw

history blame

2.52 kB

	import gradio as gr
	import openvino_genai as ov_genai
	import huggingface_hub as hf_hub

	# OpenVINO Setup
	model_id = "OpenVINO/Qwen3-0.6B-int4-ov" # Or your chosen model
	model_path = "Qwen3-0.6B-int4-ov" # Local directory for the model

	# Download the model if it doesn't exist locally
	try:
	# Check if the model directory exists. A quick and dirty check. Adjust as needed.
	import os
	if not os.path.exists(model_path):
	hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)
	except Exception as e:
	print(f"Error downloading model: {e}")
	print("Please ensure you have huggingface_hub installed and are authenticated if required.")
	exit() # Or handle the error more gracefully

	pipe = ov_genai.LLMPipeline(model_path, "CPU")
	tokenizer = pipe.get_tokenizer()
	tokenizer.set_chat_template(tokenizer.chat_template)
	pipe.start_chat() # moved pipe.start_chat() here to run after pipeline intialization


	# Gradio Chatbot UI
	def user(user_message, history: list):
	return "", history + [{"role": "user", "content": user_message}]


	def bot(history: list):
	# Get the user's last message from the history
	user_message = history[-1]["content"]

	# Use OpenVINO to generate a response
	full_response = "" # Store the complete response

	def streamer(subword): # Local streamer function
	nonlocal full_response # Allow modification of outer scope variable
	full_response += subword # Accumulate the subword
	history[-1]['content'] = full_response # Update chatbot content
	yield history
	return ov_genai.StreamingStatus.RUNNING


	# Initialize the bot message in history
	history.append({"role": "assistant", "content": ""})

	# Generate the response using the streaming function
	for updated_history in pipe.generate(user_message, streamer=streamer, max_new_tokens=100):
	yield updated_history

	# Alternatively, without the step-by-step updates, you can just do this:
	# full_response = pipe.generate(user_message, max_new_tokens=100) # but this will skip the steaming
	# history[-1]['content'] = full_response
	# yield history


	with gr.Blocks() as demo:
	chatbot = gr.Chatbot(type="messages")
	msg = gr.Textbox()
	clear = gr.Button("Clear")

	msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
	bot, chatbot, chatbot
	)
	clear.click(lambda: None, None, chatbot, queue=False)

	if __name__ == "__main__":
	demo.queue().launch()