qwen3_test / app.py
hsuwill000's picture
Update app.py
7d7759a verified
raw
history blame
1.31 kB
import huggingface_hub as hf_hub
import time
import openvino_genai as ov_genai
import numpy as np
import gradio as gr
import re
# ไธ‹่ผ‰ๆจกๅž‹
model_id = "OpenVINO/Qwen3-0.6B-int4-ov"
model_path = "Qwen3-0.6B-int4-ov"
hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)
# ๅปบ็ซ‹ๆŽจ็†็ฎก็ทš
device = "CPU"
pipe = ov_genai.LLMPipeline(model_path, device)
tokenizer = pipe.get_tokenizer()
tokenizer.set_chat_template(tokenizer.chat_template)
def generate_response(prompt):
try:
generated = pipe.generate([prompt], max_length=1024)
tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
return tokenpersec, generated
except Exception as e:
return "็™ผ็”Ÿ้Œฏ่ชค", "็™ผ็”Ÿ้Œฏ่ชค", f"็”Ÿๆˆๅ›žๆ‡‰ๆ™‚็™ผ็”Ÿ้Œฏ่ชค๏ผš{e}"
# ๅปบ็ซ‹ Gradio ไป‹้ข
demo = gr.Interface(
fn=generate_response,
inputs=gr.Textbox(lines=5, label="่ผธๅ…ฅๆ็คบ (Prompt)"),
outputs=[
gr.Textbox(label="tokens/sec"),
#gr.Textbox(label="ๆ€่€ƒ้Ž็จ‹"),
#gr.Textbox(label="ๆœ€็ต‚ๅ›žๆ‡‰")
gr.Textbox(label="ๅ›žๆ‡‰")
],
title="Qwen3-0.6B-int4-ov ",
description="ๅŸบๆ–ผ Qwen3-0.6B-int4-ov ๆŽจ็†ๆ‡‰็”จ๏ผŒๆ”ฏๆดๆ€่€ƒ้Ž็จ‹ๅˆ†้›ข่ˆ‡ GUIใ€‚"
)
if __name__ == "__main__":
demo.launch()