File size: 3,524 Bytes
7cfa5bf
8072750
 
 
 
 
 
 
 
 
 
 
dbdc900
8072750
 
 
dbdc900
7cfa5bf
 
 
 
 
 
 
70d0b73
7cfa5bf
 
 
 
 
 
 
a1a8972
8072750
 
a1a8972
7cfa5bf
 
8072750
 
 
 
 
 
 
70d0b73
8072750
 
 
 
 
 
 
7cfa5bf
8072750
 
7cfa5bf
8072750
70d0b73
8072750
a1a8972
70d0b73
 
 
 
 
 
 
 
 
 
 
a1a8972
 
70d0b73
 
 
0c41d6c
 
70d0b73
8072750
 
70d0b73
7cfa5bf
 
 
 
 
 
 
 
8072750
 
 
7cfa5bf
 
 
 
 
 
 
 
dbdc900
7cfa5bf
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import gradio as gr
import requests
import json
import os
from dotenv import load_dotenv

load_dotenv()

API_URL = os.getenv("API_URL")
API_TOKEN = os.getenv("API_TOKEN")

if not API_URL or not API_TOKEN:
    raise ValueError("invalid API_URL || API_TOKEN")

print(f"[INFO] starting:")
print(f"[INFO] API_URL: {API_URL[:6]}...{API_URL[-12:]}")
print(f"[INFO] API_TOKEN: {API_TOKEN[:10]}...{API_TOKEN[-10:]}")  

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""

def respond(
    message,
    history: list[dict],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    # 添加历史消息
    messages.extend(history)
    
    # 添加当前用户消息
    messages.append({"role": "user", "content": message})

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {API_TOKEN}"
    }

    data = {
        "model": "/data/DMind-1",
        "stream": True,
        "messages": messages,
        "temperature": temperature,
        "top_p": top_p,
        "top_k": 20,
        "min_p": 0.1,
        "max_tokens": 32768
    }

    print(f"[INFO] process user msg...")
    print(f"[INFO] userMsg: {message}")

    try:
        with requests.post(API_URL, headers=headers, json=data, stream=True) as r:
            if r.status_code == 200:
                current_response = ""
                for line in r.iter_lines():
                    if line:
                        line = line.decode('utf-8')
                        if line.startswith('data: '):
                            try:
                                json_response = json.loads(line[6:])
                                if 'choices' in json_response and len(json_response['choices']) > 0:
                                    delta = json_response['choices'][0].get('delta', {})
                                    if 'content' in delta:
                                        content = delta['content']
                                        if content:
                                            current_response += content
                                            yield current_response
                            except json.JSONDecodeError as e:
                                print(f"[ERROR] JSON decode error: {e}")
                                continue
            else:
                print(f"[ERROR] Bad status code: {r.status_code}, response: {r.text}")
                yield "Service temporarily unavailable"
    except Exception as e:
        print(f"[ERROR] Request error: {e}")
        yield "Service error occurred"


"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are DMind AI Assistant, built by DMind.AI. Never present as Qwen, ChatGPT, Tongyi, OpenAI, etc.", label="System message"),
        gr.Slider(minimum=1, maximum=32768, value=16384, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.6, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
    type="messages"  
)


if __name__ == "__main__":
    demo.launch()