Spaces:
Sleeping
Sleeping
File size: 5,103 Bytes
2d7f359 4c07c1e 2d7f359 4c07c1e 2d7f359 25fd9cd 47757ee 25fd9cd e89cacf 25fd9cd 2d7f359 7ab546e 2d7f359 7ab546e 2d7f359 7ab546e 2d7f359 f873ce7 4c07c1e a28682a 4cf93d3 25fd9cd 4cf93d3 25fd9cd c1965a3 2d7f359 c2ec273 c1965a3 f31f69d 39209e4 c1965a3 39209e4 2d7f359 4c07c1e 39209e4 c1965a3 4c07c1e c1965a3 39209e4 c1965a3 2d7f359 c1965a3 a846510 2d7f359 f873ce7 c1965a3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
"""Gradio chat demo that streams responses from a (local) OpenAIβcompatible
endpoint using the official `openai` Python SDK. The server is assumed to be
running at http://0.0.0.0:8000 with the v1 REST routes. A custom header
`XβMIXINPUTSβBETA` is forwarded so MoI can adjust its blending strength at
runtime.
Launch with:
python app_openai.py
"""
from __future__ import annotations
import os
import openai
import gradio as gr
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# OpenAI client configuration
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# ``openai`` still expects an API key even if the backend ignores it, so we use
# a dummy value when none is provided. The *base_url* points to the local
# vLLM server that speaks the OpenAI REST dialect.
# -----------------------------------------------------------------------------
openai_api_key = "EMPTY"
openai_api_base = "http://localhost:8000/v1"
client = openai.OpenAI(
api_key=openai_api_key,
base_url=openai_api_base,
)
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Chat handler
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def stream_completion(message: str,
history: list[tuple[str, str]],
max_tokens: int,
temperature: float,
top_p: float,
beta: float):
"""Gradio callback that yields streaming assistant replies.
The function reconstructs the conversation *excluding* any system prompt
and then calls ``openai.chat.completions.create`` with ``stream=True``.
Each incoming delta is appended to an ``assistant`` buffer which is sent
back to the Chatbot component for realβtime display.
"""
# Build OpenAIβstyle message list from prior turns
messages: list[dict[str, str]] = []
for user_msg, assistant_msg in history:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
# Current user input comes last
messages.append({"role": "user", "content": message})
os.environ["MIXINPUTS_BETA"] = str(beta)
#try:
# Kick off streaming completion
response = client.chat.completions.create(
model="Qwen/Qwen3-4B",
messages=messages,
temperature=temperature,
top_p=top_p,
max_tokens=max_tokens,
)
assistant = response.choices[0].message
yield history + [(message, assistant)] # live update
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Gradio UI
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
with gr.Blocks(title="π¨ Mixture of Inputs (MoI) Demo") as demo:
gr.Markdown(
"## π¨ Mixture of Inputs (MoI) Demo \n"
"Streaming vLLM demo with dynamic **beta** adjustment in MoI "
"(higher beta β less blending)."
)
with gr.Row(): # sliders first
beta = gr.Slider(0.0, 10.0, value=1.0, step=0.1, label="MoI Ξ²")
temperature = gr.Slider(0.1, 1.0, value=0.6, step=0.1, label="Temperature")
top_p = gr.Slider(0.1, 1.0, value=0.80, step=0.05, label="Topβp")
max_tokens = gr.Slider(1, 2048, value=512, step=1, label="Max new tokens")
chatbot = gr.Chatbot(height=450)
user_box = gr.Textbox(placeholder="Type a message and press Enterβ¦", show_label=False)
clear_btn = gr.Button("Clear chat")
user_box.submit(
fn=stream_completion,
inputs=[user_box, chatbot, max_tokens, temperature, top_p, beta],
outputs=chatbot,
)
clear_btn.click(lambda: None, None, chatbot, queue=False)
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
if __name__ == "__main__":
demo.launch()
|