Yyyy / app.py
Athspi's picture
Rename app to app.py
45ca121 verified
raw
history blame
1.32 kB
import gradio as gr
from onnxruntime_genai import ChatSession, GenerationConfig, ORTModel
# Path to the downloaded ONNX model
MODEL_DIR = "model/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4"
# Load the model
model = ORTModel(MODEL_DIR, execution_provider="cpu")
session = ChatSession(model)
# Chat function
def chat_with_phi4(message, history):
history = history or []
for past_msg in history:
session.history.append((past_msg[0], past_msg[1]))
reply = session.chat(message, config=GenerationConfig(max_new_tokens=300))
history.append((message, reply))
return history, history
# Hugging Face logo
HF_LOGO = "https://huggingface.co/front/assets/huggingface_logo-noborder.svg"
# Gradio interface
with gr.Blocks() as demo:
gr.Image(value=HF_LOGO, width=120, show_label=False, show_download_button=False)
gr.Markdown("### Chat with Microsoft Phi-4 Mini Instruct (ONNX)")
chatbot = gr.Chatbot()
user_input = gr.Textbox(label="Your message")
state = gr.State([])
send_btn = gr.Button("Send")
clear_btn = gr.Button("Clear")
send_btn.click(chat_with_phi4, [user_input, state], [chatbot, state])
user_input.submit(chat_with_phi4, [user_input, state], [chatbot, state])
clear_btn.click(lambda: ([], []), outputs=[chatbot, state])
demo.launch()