Spaces:
Sleeping
Sleeping
import gradio as gr | |
import requests | |
import os | |
import spaces | |
from server import setup_mixinputs, launch_vllm_server | |
API_URL = "http://localhost:8000/v1/chat/completions" | |
def chat_with_moi(message, history, temperature, top_p, beta): | |
# Set the MIXINPUTS_BETA env var *per request* | |
os.environ["MIXINPUTS_BETA"] = str(beta) | |
# setup_mixinputs() | |
# launch_vllm_server(beta=beta) | |
payload = { | |
"model": "Qwen/QwQ-32B", # match what your vLLM server expects | |
"messages": [{"role": "user", "content": message}], | |
"temperature": temperature, | |
"top_p": top_p, | |
"max_tokens": 512, | |
} | |
try: | |
response = requests.post(API_URL, json=payload) | |
response.raise_for_status() | |
return response.json()["choices"][0]["message"]["content"] | |
except Exception as e: | |
return f"[ERROR] {str(e)}" | |
# Gradio UI | |
with gr.Blocks() as demo: | |
gr.Markdown("# 🧪 Mixture of Inputs (MoI) Demo with vLLM") | |
with gr.Row(): | |
temperature = gr.Slider(0.0, 1.5, value=0.7, label="Temperature") | |
top_p = gr.Slider(0.0, 1.0, value=0.95, label="Top-p") | |
beta = gr.Slider(0.0, 10.0, value=1.0, label="MoI Beta") | |
chatbot = gr.Chatbot() | |
message = gr.Textbox(label="Your message") | |
send_btn = gr.Button("Send") | |
history = gr.State([]) | |
def respond(user_message, chat_history, temperature, top_p, beta): | |
reply = chat_with_moi(user_message, chat_history, temperature, top_p, beta) | |
chat_history = chat_history + [(user_message, reply)] | |
return chat_history, chat_history | |
send_btn.click(respond, inputs=[message, history, temperature, top_p, beta], | |
outputs=[chatbot, history]) | |
demo.launch() | |