import os import gradio as gr from openai import OpenAI # Pick up secrets from HF Space BASE = os.getenv("HF_ENDPOINT_URL", "").rstrip("/") API_KEY = os.getenv("HF_TOKEN") MODEL_ID = "kaizen9/qsft_30_6000_v2" client = OpenAI( base_url=f"{BASE}/v1", api_key=API_KEY, ) def build_messages(history, user_msg, system_msg): msgs = [] if system_msg.strip(): msgs.append({"role": "system", "content": system_msg.strip()}) for u, a in history: if u: msgs.append({"role": "user", "content": u}) if a: msgs.append({"role": "assistant", "content": a}) msgs.append({"role": "user", "content": user_msg}) return msgs def chat_fn(message, history, system_message, temperature, top_p, max_tokens): msgs = build_messages(history, message, system_message) stream = client.chat.completions.create( model=MODEL_ID, messages=msgs, temperature=float(temperature), top_p=float(top_p), max_tokens=int(max_tokens), stream=True, ) partial = "" for chunk in stream: delta = chunk.choices[0].delta if delta and delta.content: partial += delta.content yield partial with gr.Blocks() as demo: gr.Markdown("# QSFT Chat UI") system_box = gr.Textbox( label="System prompt", value="You are a helpful assistant.", lines=2, ) temp = gr.Slider(0.0, 2.0, 0.7, step=0.1, label="Temperature") topp = gr.Slider(0.0, 1.0, 0.95, step=0.01, label="Top-p") maxt = gr.Slider(16, 4096, 512, step=16, label="Max tokens") gr.ChatInterface( fn=chat_fn, additional_inputs=[system_box, temp, topp, maxt], retry_btn=True, undo_btn=True, ) if __name__ == "__main__": demo.launch()