File size: 5,141 Bytes
7cfa5bf 8072750 dbdc900 8072750 dbdc900 7cfa5bf 70d0b73 7cfa5bf a1a8972 8072750 a1a8972 7cfa5bf 8072750 70d0b73 8072750 7cfa5bf 8072750 7cfa5bf 8072750 70d0b73 8072750 a1a8972 e337119 70d0b73 e337119 70d0b73 e337119 0c41d6c 70d0b73 8072750 70d0b73 7cfa5bf 8072750 7cfa5bf dbdc900 7cfa5bf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import gradio as gr
import requests
import json
import os
from dotenv import load_dotenv
load_dotenv()
API_URL = os.getenv("API_URL")
API_TOKEN = os.getenv("API_TOKEN")
if not API_URL or not API_TOKEN:
raise ValueError("invalid API_URL || API_TOKEN")
print(f"[INFO] starting:")
print(f"[INFO] API_URL: {API_URL[:6]}...{API_URL[-12:]}")
print(f"[INFO] API_TOKEN: {API_TOKEN[:10]}...{API_TOKEN[-10:]}")
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
def respond(
message,
history: list[dict],
system_message,
max_tokens,
temperature,
top_p,
):
messages = [{"role": "system", "content": system_message}]
# 添加历史消息
messages.extend(history)
# 添加当前用户消息
messages.append({"role": "user", "content": message})
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {API_TOKEN}"
}
data = {
"model": "/data/DMind-1",
"stream": True,
"messages": messages,
"temperature": temperature,
"top_p": top_p,
"top_k": 20,
"min_p": 0.1,
"max_tokens": 32768
}
print(f"[INFO] process user msg...")
print(f"[INFO] userMsg: {message}")
try:
with requests.post(API_URL, headers=headers, json=data, stream=True) as r:
if r.status_code == 200:
current_response = ""
buffer = "" # 用于累积可能被分割的标记
in_think_block = False
found_final_answer = False
for line in r.iter_lines():
if line:
line = line.decode('utf-8')
if line.startswith('data: '):
try:
json_response = json.loads(line[6:])
if 'choices' in json_response and len(json_response['choices']) > 0:
delta = json_response['choices'][0].get('delta', {})
if 'content' in delta:
content = delta['content']
if content:
# 将新内容添加到缓冲区
buffer += content
# 检查缓冲区中是否包含完整的标记
if not in_think_block and '<think>' in buffer:
in_think_block = True
buffer = buffer.split('<think>')[-1]
if in_think_block and '</think>' in buffer:
in_think_block = False
buffer = buffer.split('</think>')[-1]
if not found_final_answer and '**Final Answer**' in buffer:
found_final_answer = True
buffer = buffer.split('**Final Answer**')[-1]
# 如果不在think块内,就累积内容
if not in_think_block:
current_response += buffer
yield current_response
buffer = "" # 清空缓冲区
except json.JSONDecodeError:
continue
print(f"[INFO] final response: {current_response}")
else:
print(f"[ERROR] Bad status code: {r.status_code}, response: {r.text}")
yield "Service temporarily unavailable"
except Exception as e:
print(f"[ERROR] Request error: {e}")
yield "Service error occurred"
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are DMind AI Assistant, built by DMind.AI. Never present as Qwen, ChatGPT, Tongyi, OpenAI, etc.", label="System message"),
gr.Slider(minimum=1, maximum=32768, value=16384, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.6, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
type="messages"
)
if __name__ == "__main__":
demo.launch()
|