File size: 3,553 Bytes
7cfa5bf
8072750
 
 
 
 
 
 
 
 
 
 
 
 
dbdc900
8072750
 
 
dbdc900
7cfa5bf
 
 
 
 
 
 
8072750
7cfa5bf
 
 
 
 
 
 
8072750
 
 
 
7cfa5bf
 
8072750
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7cfa5bf
8072750
 
7cfa5bf
8072750
 
0c41d6c
8072750
 
0c41d6c
8072750
 
0c41d6c
8072750
dbdc900
 
0c41d6c
 
 
8072750
0c41d6c
 
 
 
8072750
 
 
 
7cfa5bf
 
 
 
 
 
 
 
8072750
 
 
7cfa5bf
 
 
 
 
 
 
 
dbdc900
7cfa5bf
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import gradio as gr
import requests
import json
import os
from dotenv import load_dotenv

# 加载.env文件中的环境变量
load_dotenv()

# 从环境变量中读取配置
API_URL = os.getenv("API_URL")
API_TOKEN = os.getenv("API_TOKEN")

if not API_URL or not API_TOKEN:
    raise ValueError("invalid API_URL || API_TOKEN")

print(f"[INFO] starting:")
print(f"[INFO] API_URL: {API_URL[:6]}...{API_URL[-12:]}")
print(f"[INFO] API_TOKEN: {API_TOKEN[:10]}...{API_TOKEN[-10:]}")  

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""

def respond(
    message,
    history: list[dict],  # 修改为新的消息格式
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    # 添加历史消息
    messages.extend(history)
    
    # 添加当前用户消息
    messages.append({"role": "user", "content": message})

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {API_TOKEN}"
    }

    data = {
        "model": "/data/DMind-1",
        "stream": False,
        "messages": messages,
        "temperature": temperature,
        "top_p": top_p,
        "top_k": 20,
        "min_p": 0.1,
        "max_tokens": 32768
    }

    print(f"[INFO] process user msg...")
    print(f"[INFO] userMsg: {message}")

    try:
        with requests.post(API_URL, headers=headers, json=data) as r:
            print(f"[INFO] response status: {r.status_code}")
            if r.status_code == 200:
                json_response = r.json()
                print(f"[INFO] response json: {json_response}")
                if 'choices' in json_response and len(json_response['choices']) > 0:
                    content = json_response['choices'][0].get('message', {}).get('content', '')
                    print(f"[INFO] response content: {content}")
                    if content:
                        if '<think>' in content and '</think>' in content:
                            content = content.split('</think>')[-1].strip()
                        if '**Final Answer**' in content:
                            content = content.split('**Final Answer**')[-1].strip()
                        print(f"[INFO] final response: {content}")
                        return content
                else:
                    print(f"[ERROR] No choices in response: {json_response}")
            else:
                print(f"[ERROR] Bad status code: {r.status_code}, response: {r.text}")
            return "Service temporarily unavailable"
    except Exception as e:
        print(f"[ERROR] Request error: {e}")
        return "Service error occurred"


"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are DMind AI Assistant, built by DMind.AI. Never present as Qwen, ChatGPT, Tongyi, OpenAI, etc.", label="System message"),
        gr.Slider(minimum=1, maximum=32768, value=16384, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.6, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
    type="messages"  
)


if __name__ == "__main__":
    demo.launch()