File size: 3,524 Bytes
7cfa5bf 8072750 dbdc900 8072750 dbdc900 7cfa5bf 70d0b73 7cfa5bf a1a8972 8072750 a1a8972 7cfa5bf 8072750 70d0b73 8072750 7cfa5bf 8072750 7cfa5bf 8072750 70d0b73 8072750 a1a8972 70d0b73 a1a8972 70d0b73 0c41d6c 70d0b73 8072750 70d0b73 7cfa5bf 8072750 7cfa5bf dbdc900 7cfa5bf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import gradio as gr
import requests
import json
import os
from dotenv import load_dotenv
load_dotenv()
API_URL = os.getenv("API_URL")
API_TOKEN = os.getenv("API_TOKEN")
if not API_URL or not API_TOKEN:
raise ValueError("invalid API_URL || API_TOKEN")
print(f"[INFO] starting:")
print(f"[INFO] API_URL: {API_URL[:6]}...{API_URL[-12:]}")
print(f"[INFO] API_TOKEN: {API_TOKEN[:10]}...{API_TOKEN[-10:]}")
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
def respond(
message,
history: list[dict],
system_message,
max_tokens,
temperature,
top_p,
):
messages = [{"role": "system", "content": system_message}]
# 添加历史消息
messages.extend(history)
# 添加当前用户消息
messages.append({"role": "user", "content": message})
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {API_TOKEN}"
}
data = {
"model": "/data/DMind-1",
"stream": True,
"messages": messages,
"temperature": temperature,
"top_p": top_p,
"top_k": 20,
"min_p": 0.1,
"max_tokens": 32768
}
print(f"[INFO] process user msg...")
print(f"[INFO] userMsg: {message}")
try:
with requests.post(API_URL, headers=headers, json=data, stream=True) as r:
if r.status_code == 200:
current_response = ""
for line in r.iter_lines():
if line:
line = line.decode('utf-8')
if line.startswith('data: '):
try:
json_response = json.loads(line[6:])
if 'choices' in json_response and len(json_response['choices']) > 0:
delta = json_response['choices'][0].get('delta', {})
if 'content' in delta:
content = delta['content']
if content:
current_response += content
yield current_response
except json.JSONDecodeError as e:
print(f"[ERROR] JSON decode error: {e}")
continue
else:
print(f"[ERROR] Bad status code: {r.status_code}, response: {r.text}")
yield "Service temporarily unavailable"
except Exception as e:
print(f"[ERROR] Request error: {e}")
yield "Service error occurred"
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are DMind AI Assistant, built by DMind.AI. Never present as Qwen, ChatGPT, Tongyi, OpenAI, etc.", label="System message"),
gr.Slider(minimum=1, maximum=32768, value=16384, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.6, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
type="messages"
)
if __name__ == "__main__":
demo.launch()
|