zhaozengxi commited on
Commit
f908b7d
·
verified ·
1 Parent(s): 0ab87e1

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -22
app.py CHANGED
@@ -1,28 +1,59 @@
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
- # 替换为你想使用的模型
5
- client = InferenceClient(model="Qwen/Qwen1.5-7B-Chat")
 
6
 
7
- def predict(message, history):
8
- messages = [{"role": "system", "content": "You are a helpful assistant."}]
9
- for human, bot in history:
10
- messages.append({"role": "user", "content": human})
11
- messages.append({"role": "assistant", "content": bot})
 
 
 
 
 
12
  messages.append({"role": "user", "content": message})
13
 
14
- partial_text = ""
15
- for chunk in client.chat_completion(model="Qwen/Qwen1.5-7B-Chat",
16
- messages=messages,
17
- stream=True,
18
- temperature=0.7,
19
- top_p=0.95):
20
- if chunk.choices and chunk.choices[0].delta and "content" in chunk.choices[0].delta:
21
- partial_text += chunk.choices[0].delta["content"]
22
- yield partial_text
23
-
24
- chat_interface = gr.ChatInterface(fn=predict, title="🌟 Streaming Chat with Qwen",
25
- examples=["介绍一下你自己", "讲一个冷笑话", "什么是人工智能?"],
26
- retry_btn=None, undo_btn="撤回", clear_btn="清空")
27
-
28
- chat_interface.queue().launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
  import gradio as gr
3
  from huggingface_hub import InferenceClient
4
 
5
+ # 安全读取 Huggingface Token
6
+ HF_TOKEN = os.getenv("HF_TOKEN")
7
+ MODEL_NAME = "Qwen/Qwen3-235B-A22B"
8
 
9
+ client = InferenceClient(model=MODEL_NAME, token=HF_TOKEN)
10
+
11
+ def chat_fn(message, history, system_message, temperature, top_p, max_tokens, repetition_penalty):
12
+ # 构造消息
13
+ messages = []
14
+ if system_message:
15
+ messages.append({"role": "system", "content": system_message})
16
+ for user_msg, bot_msg in history:
17
+ messages.append({"role": "user", "content": user_msg})
18
+ messages.append({"role": "assistant", "content": bot_msg})
19
  messages.append({"role": "user", "content": message})
20
 
21
+ # 请求模型
22
+ response = client.chat_completion(
23
+ messages=messages,
24
+ temperature=temperature,
25
+ top_p=top_p,
26
+ max_tokens=max_tokens,
27
+ repetition_penalty=repetition_penalty,
28
+ )
29
+
30
+ answer = response.choices[0].message["content"]
31
+ history.append((message, answer))
32
+ return history, history
33
+
34
+ with gr.Blocks(theme=gr.themes.Base(), css="footer {display: none !important}") as demo:
35
+ gr.Markdown("# 🤖 Qwen3 Chatbot")
36
+ with gr.Row():
37
+ with gr.Column(scale=3):
38
+ chatbot = gr.Chatbot()
39
+ message = gr.Textbox(label="Your Message", placeholder="Type something...", scale=4)
40
+ submit = gr.Button("Send", variant="primary")
41
+ clear = gr.Button("Clear")
42
+
43
+ state = gr.State([])
44
+ with gr.Column(scale=1):
45
+ system_message = gr.Textbox(label="System Prompt", placeholder="You are a helpful assistant.")
46
+ temperature = gr.Slider(0, 1, value=0.7, step=0.05, label="Temperature")
47
+ top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p")
48
+ max_tokens = gr.Slider(64, 2048, value=512, step=64, label="Max Tokens")
49
+ repetition_penalty = gr.Slider(1.0, 2.0, value=1.1, step=0.1, label="Repetition Penalty")
50
+
51
+ submit.click(chat_fn,
52
+ inputs=[message, state, system_message, temperature, top_p, max_tokens, repetition_penalty],
53
+ outputs=[chatbot, state])
54
+ message.submit(chat_fn,
55
+ inputs=[message, state, system_message, temperature, top_p, max_tokens, repetition_penalty],
56
+ outputs=[chatbot, state])
57
+ clear.click(lambda: ([], []), None, [chatbot, state])
58
+
59
+ demo.launch()