Spaces:
Running
Running
File size: 2,199 Bytes
07268e3 1dd12ee 07268e3 4b02b47 759aea4 77e29eb 51f6523 7dc6c22 d32e032 759aea4 4b02b47 a1d9077 4b02b47 759aea4 1dd12ee 759aea4 1dd12ee 759aea4 d32e032 1dd12ee a1d9077 1dd12ee 60685d1 d32e032 759aea4 da55442 d32e032 759aea4 d32e032 759aea4 d32e032 759aea4 a1d9077 759aea4 d32e032 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import huggingface_hub as hf_hub
import time
import openvino_genai as ov_genai
import numpy as np
import gradio as gr
import re
# 下載模型
model_ids = [
"OpenVINO/Qwen3-0.6B-int4-ov",
"OpenVINO/Qwen3-1.7B-int4-ov",
#"OpenVINO/Qwen3-4B-int4-ov",#不可用
"OpenVINO/Qwen3-8B-int4-ov",
"OpenVINO/Qwen3-14B-int4-ov",
]
model_name_to_full_id = {model_id.split("/")[-1]: model_id for model_id in model_ids} #Create Dictionary
for model_id in model_ids:
model_path = model_id.split("/")[-1] # Extract model name
try:
hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)
print(f"Successfully downloaded {model_id} to {model_path}") # Optional: Print confirmation
except Exception as e:
print(f"Error downloading {model_id}: {e}") # Handle download errors gracefully
# 建立推理管線 (Initialize with a default model first)
device = "CPU"
default_model_name = "Qwen3-0.6B-int4-ov" # Choose a default model
def generate_response(prompt, model_name):
global pipe, tokenizer # Access the global variables
model_path = model_name
print(f"Switching to model: {model_name}")
pipe = ov_genai.LLMPipeline(model_path, device)
tokenizer = pipe.get_tokenizer()
tokenizer.set_chat_template(tokenizer.chat_template)
try:
generated = pipe.generate([prompt], max_length=1024)
tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
return tokenpersec, generated
except Exception as e:
return "發生錯誤", "發生錯誤", f"生成回應時發生錯誤:{e}"
# 建立 Gradio 介面
model_choices = list(model_name_to_full_id.keys())
demo = gr.Interface(
fn=generate_response,
inputs=[
gr.Textbox(lines=5, label="輸入提示 (Prompt)"),
gr.Dropdown(choices=model_choices, value=default_model_name, label="選擇模型") # Added dropdown
],
outputs=[
gr.Textbox(label="tokens/sec"),
gr.Textbox(label="回應"),
],
title="Qwen3 Model Inference",
description="基於 Qwen3 推理應用,支援思考過程分離與 GUI。"
)
if __name__ == "__main__":
demo.launch()
|