import os import gradio as gr from huggingface_hub import InferenceClient # 安全读取 Huggingface Token HF_TOKEN = os.getenv("HF_TOKEN") MODEL_NAME = "Qwen/Qwen3-235B-A22B" client = InferenceClient(model=MODEL_NAME, token=HF_TOKEN) def chat_fn(message, history, system_message, temperature, top_p, max_tokens, repetition_penalty): # 构造消息 messages = [] if system_message: messages.append({"role": "system", "content": system_message}) for user_msg, bot_msg in history: messages.append({"role": "user", "content": user_msg}) messages.append({"role": "assistant", "content": bot_msg}) messages.append({"role": "user", "content": message}) # 请求模型 response = client.chat_completion( messages=messages, temperature=temperature, top_p=top_p, max_tokens=max_tokens, repetition_penalty=repetition_penalty, ) answer = response.choices[0].message["content"] history.append((message, answer)) return history, history with gr.Blocks(theme=gr.themes.Base(), css="footer {display: none !important}") as demo: gr.Markdown("# 🤖 Qwen3 Chatbot") with gr.Row(): with gr.Column(scale=3): chatbot = gr.Chatbot() message = gr.Textbox(label="Your Message", placeholder="Type something...", scale=4) submit = gr.Button("Send", variant="primary") clear = gr.Button("Clear") state = gr.State([]) with gr.Column(scale=1): system_message = gr.Textbox(label="System Prompt", placeholder="You are a helpful assistant.") temperature = gr.Slider(0, 1, value=0.7, step=0.05, label="Temperature") top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p") max_tokens = gr.Slider(64, 2048, value=512, step=64, label="Max Tokens") repetition_penalty = gr.Slider(1.0, 2.0, value=1.1, step=0.1, label="Repetition Penalty") submit.click(chat_fn, inputs=[message, state, system_message, temperature, top_p, max_tokens, repetition_penalty], outputs=[chatbot, state]) message.submit(chat_fn, inputs=[message, state, system_message, temperature, top_p, max_tokens, repetition_penalty], outputs=[chatbot, state]) clear.click(lambda: ([], []), None, [chatbot, state]) demo.launch()