File size: 1,315 Bytes
45ca121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import gradio as gr
from onnxruntime_genai import ChatSession, GenerationConfig, ORTModel

# Path to the downloaded ONNX model
MODEL_DIR = "model/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4"

# Load the model
model = ORTModel(MODEL_DIR, execution_provider="cpu")
session = ChatSession(model)

# Chat function
def chat_with_phi4(message, history):
    history = history or []
    for past_msg in history:
        session.history.append((past_msg[0], past_msg[1]))

    reply = session.chat(message, config=GenerationConfig(max_new_tokens=300))
    history.append((message, reply))
    return history, history

# Hugging Face logo
HF_LOGO = "https://huggingface.co/front/assets/huggingface_logo-noborder.svg"

# Gradio interface
with gr.Blocks() as demo:
    gr.Image(value=HF_LOGO, width=120, show_label=False, show_download_button=False)
    gr.Markdown("### Chat with Microsoft Phi-4 Mini Instruct (ONNX)")

    chatbot = gr.Chatbot()
    user_input = gr.Textbox(label="Your message")
    state = gr.State([])

    send_btn = gr.Button("Send")
    clear_btn = gr.Button("Clear")

    send_btn.click(chat_with_phi4, [user_input, state], [chatbot, state])
    user_input.submit(chat_with_phi4, [user_input, state], [chatbot, state])
    clear_btn.click(lambda: ([], []), outputs=[chatbot, state])

demo.launch()