server / app.py
RedMod
a
3149dc0
import os
import gradio as gr
from openai import OpenAI
BASE = os.getenv("HF_ENDPOINT_URL", "").rstrip("/")
API_KEY = os.getenv("HF_TOKEN")
MODEL_ID = os.getenv("MODEL_ID", "RedMod/mangrove_30b_a3b_sft_step_6000")
if not BASE or not API_KEY:
raise RuntimeError("Set HF_ENDPOINT_URL and HF_TOKEN in Settings → Repository secrets.")
client = OpenAI(base_url=f"{BASE}/v1", api_key=API_KEY)
def build_messages(history, user_msg, system_msg):
msgs = []
if system_msg and system_msg.strip():
msgs.append({"role": "system", "content": system_msg.strip()})
for u, a in history:
if u:
msgs.append({"role": "user", "content": u})
if a:
msgs.append({"role": "assistant", "content": a})
msgs.append({"role": "user", "content": user_msg})
return msgs
def chat_fn(message, history, system_message, temperature, top_p, max_tokens):
msgs = build_messages(history, message, system_message)
stream = client.chat.completions.create(
model=MODEL_ID,
messages=msgs,
temperature=float(temperature),
top_p=float(top_p),
max_tokens=int(max_tokens),
stream=True,
)
partial = ""
for chunk in stream:
delta = chunk.choices[0].delta
if delta and delta.content:
partial += delta.content
yield partial
with gr.Blocks(title="Mangrove Demo") as demo:
system_box = gr.Textbox(
label="System prompt",
value="You are a helpful assistant.",
lines=2,
)
with gr.Row():
temp = gr.Slider(0.0, 2.0, value=0.7, step=0.1, label="Temperature")
topp = gr.Slider(0.0, 1.0, value=0.95, step=0.01, label="Top-p")
maxt = gr.Slider(16, 4096, value=512, step=16, label="Max tokens")
gr.ChatInterface(
fn=chat_fn,
additional_inputs=[system_box, temp, topp, maxt],
submit_btn="Send",
stop_btn="Stop",
multimodal=False,
)
if __name__ == "__main__":
demo.launch()