Spaces:

yzhuang
/

MixtureOfInputs

Sleeping

App Files Files Community

yzhuang commited on May 22

Commit

c1965a3

1 Parent(s): 335b83f

reformat

Browse files

Files changed (1) hide show

app.py +56 -49

app.py CHANGED Viewed

@@ -1,74 +1,81 @@
-import gradio as gr
 import requests
 import sseclient
-import os
 API_URL = "http://localhost:8000/v1/chat/completions"
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-    beta,
-):
-    # Build message history
-    messages = [{"role": "system", "content": system_message}]
-    for user, assistant in history:
-        if user:
-            messages.append({"role": "user", "content": user})
-        if assistant:
-            messages.append({"role": "assistant", "content": assistant})
     messages.append({"role": "user", "content": message})
-    # Prepare request payload
     payload = {
-        "model": "Qwen/Qwen3-4B",  # Update to your actual model if needed
         "messages": messages,
         "temperature": temperature,
         "top_p": top_p,
-        "max_tokens": max_tokens,
         "stream": True,
     }
-    # Optional: send beta as a custom OpenAI field
     headers = {
         "Content-Type": "application/json",
-        "X-MIXINPUTS-BETA": str(beta),  # or modify your vLLM code to read this
     }
-    # Stream response using SSE (Server-Sent Events)
     try:
-        response = requests.post(API_URL, json=payload, stream=True, headers=headers)
-        response.raise_for_status()
-        client = sseclient.SSEClient(response)
-        full_text = ""
         for event in client.events():
-            if event.data == "[DONE]":
                 break
-            delta = event.json()["choices"][0]["delta"].get("content", "")
-            full_text += delta
-            yield full_text
-    except Exception as e:
-        yield f"[ERROR] {e}"
-# UI layout using ChatInterface
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a helpful assistant using Mixture of Inputs.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
-        gr.Slider(minimum=0.0, maximum=10.0, value=1.0, step=0.1, label="MoI Beta"),
-    ],
-    title="🧪 Mixture of Inputs (MoI) Demo",
-    description="Streaming local vLLM demo with dynamic MoI beta adjustment.",
-)
 if __name__ == "__main__":
-    demo.launch()

+# app.py
+import json
 import requests
 import sseclient
+import gradio as gr
 API_URL = "http://localhost:8000/v1/chat/completions"
+def stream_completion(message, history, max_tokens, temperature, top_p, beta):
+    """
+    Gradio callback: takes the newest user message + full chat history,
+    returns an updated history while streaming assistant tokens.
+    """
+    # ------- build OpenAI-style message list (no system prompt) -------------
+    messages = []
+    for usr, bot in history:
+        if usr:
+            messages.append({"role": "user", "content": usr})
+        if bot:
+            messages.append({"role": "assistant", "content": bot})
     messages.append({"role": "user", "content": message})
     payload = {
+        "model": "Qwen/Qwen3-4B",
         "messages": messages,
         "temperature": temperature,
         "top_p": top_p,
+        "max_tokens": int(max_tokens),
         "stream": True,
     }
     headers = {
         "Content-Type": "application/json",
+        "X-MIXINPUTS-BETA": str(beta),
     }
     try:
+        resp = requests.post(API_URL, json=payload, stream=True, headers=headers, timeout=60)
+        resp.raise_for_status()
+        client = sseclient.SSEClient(resp)
+        assistant = ""
         for event in client.events():
+            if event.data.strip() == "[DONE]":
                 break
+            delta = json.loads(event.data)["choices"][0]["delta"].get("content", "")
+            assistant += delta
+            yield history + [(message, assistant)]  # update the chat box live
+    except Exception as err:
+        yield history + [(message, f"[ERROR] {err}")]
+# ----------------------- UI ---------------------------------------------
+with gr.Blocks(title="🧪 Mixture of Inputs (MoI) Demo") as demo:
+    gr.Markdown(
+        "## 🧪 Mixture of Inputs (MoI) Demo  \n"
+        "Streaming local vLLM demo with dynamic **beta** adjustment."
+    )
+    # sliders first – all on one row
+    with gr.Row():
+        max_tokens = gr.Slider(1, 2048, value=512, step=1, label="Max new tokens")
+        temperature = gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature")
+        top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p")
+        beta = gr.Slider(0.0, 10.0, value=1.0, step=0.1, label="MoI Beta")
+    chatbot = gr.Chatbot(height=450)
+    user_box = gr.Textbox(placeholder="Type a message and press Enter…", show_label=False)
+    clear_btn = gr.Button("Clear chat")
+    # wiring
+    user_box.submit(
+        stream_completion,
+        inputs=[user_box, chatbot, max_tokens, temperature, top_p, beta],
+        outputs=chatbot,
+    )
+    clear_btn.click(lambda: None, None, chatbot, queue=False)
 if __name__ == "__main__":
+    demo.launch()