|
import gradio as gr |
|
from onnxruntime_genai import ChatSession, GenerationConfig, ORTModel |
|
|
|
|
|
MODEL_DIR = "model/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4" |
|
|
|
|
|
model = ORTModel(MODEL_DIR, execution_provider="cpu") |
|
session = ChatSession(model) |
|
|
|
|
|
def chat_with_phi4(message, history): |
|
history = history or [] |
|
for past_msg in history: |
|
session.history.append((past_msg[0], past_msg[1])) |
|
|
|
reply = session.chat(message, config=GenerationConfig(max_new_tokens=300)) |
|
history.append((message, reply)) |
|
return history, history |
|
|
|
|
|
HF_LOGO = "https://huggingface.co/front/assets/huggingface_logo-noborder.svg" |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Image(value=HF_LOGO, width=120, show_label=False, show_download_button=False) |
|
gr.Markdown("### Chat with Microsoft Phi-4 Mini Instruct (ONNX)") |
|
|
|
chatbot = gr.Chatbot() |
|
user_input = gr.Textbox(label="Your message") |
|
state = gr.State([]) |
|
|
|
send_btn = gr.Button("Send") |
|
clear_btn = gr.Button("Clear") |
|
|
|
send_btn.click(chat_with_phi4, [user_input, state], [chatbot, state]) |
|
user_input.submit(chat_with_phi4, [user_input, state], [chatbot, state]) |
|
clear_btn.click(lambda: ([], []), outputs=[chatbot, state]) |
|
|
|
demo.launch() |