Spaces:

yzhuang
/

MixtureOfInputs

Sleeping

File size: 1,757 Bytes

4c07c1e
a846510
 
 
4c07c1e
a846510
4c07c1e
a846510
4c07c1e
a846510
 
 
 
4c07c1e
952fd8e
 
4c07c1e
a846510
 
 
 
 
 
 
4c07c1e
a846510
 
 
 
 
 
4c07c1e
a846510
 
 
4c07c1e
a846510
 
 
 
4c07c1e
a846510
 
 
4c07c1e
a846510
4c07c1e
a846510
 
 
 
4c07c1e
a846510

import gradio as gr
import requests
import os
import spaces

from server import setup_mixinputs, launch_vllm_server

API_URL = "http://localhost:8000/v1/chat/completions"

@spaces.GPU(duration=120)
def chat_with_moi(message, history, temperature, top_p, beta):
    # Set the MIXINPUTS_BETA env var *per request*
    os.environ["MIXINPUTS_BETA"] = str(beta)

    # setup_mixinputs()
    # launch_vllm_server(beta=beta)

    payload = {
        "model": "Qwen/QwQ-32B",  # match what your vLLM server expects
        "messages": [{"role": "user", "content": message}],
        "temperature": temperature,
        "top_p": top_p,
        "max_tokens": 512,
    }

    try:
        response = requests.post(API_URL, json=payload)
        response.raise_for_status()
        return response.json()["choices"][0]["message"]["content"]
    except Exception as e:
        return f"[ERROR] {str(e)}"

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# 🧪 Mixture of Inputs (MoI) Demo with vLLM")

    with gr.Row():
        temperature = gr.Slider(0.0, 1.5, value=0.7, label="Temperature")
        top_p = gr.Slider(0.0, 1.0, value=0.95, label="Top-p")
        beta = gr.Slider(0.0, 10.0, value=1.0, label="MoI Beta")

    chatbot = gr.Chatbot()
    message = gr.Textbox(label="Your message")
    send_btn = gr.Button("Send")

    history = gr.State([])

    def respond(user_message, chat_history, temperature, top_p, beta):
        reply = chat_with_moi(user_message, chat_history, temperature, top_p, beta)
        chat_history = chat_history + [(user_message, reply)]
        return chat_history, chat_history

    send_btn.click(respond, inputs=[message, history, temperature, top_p, beta],
                   outputs=[chatbot, history])

demo.launch()