File size: 7,194 Bytes
7cfa5bf 8072750 dbdc900 8072750 dbdc900 7cfa5bf 70d0b73 7cfa5bf 8072750 7cfa5bf 8072750 70d0b73 8072750 7cfa5bf 8072750 70d0b73 8072750 a1a8972 70d0b73 543c6f8 aec6f8a 543c6f8 942e73f 543c6f8 942e73f 543c6f8 942e73f 543c6f8 e337119 70d0b73 543c6f8 942e73f 543c6f8 942e73f 543c6f8 942e73f 543c6f8 0c41d6c 70d0b73 8072750 70d0b73 7cfa5bf 8072750 7cfa5bf 9852371 ada8ad0 9852371 ada8ad0 9852371 942e73f 9852371 7cfa5bf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
import gradio as gr
import requests
import json
import os
from dotenv import load_dotenv
load_dotenv()
API_URL = os.getenv("API_URL")
API_TOKEN = os.getenv("API_TOKEN")
if not API_URL or not API_TOKEN:
raise ValueError("invalid API_URL || API_TOKEN")
print(f"[INFO] starting:")
print(f"[INFO] API_URL: {API_URL[:6]}...{API_URL[-12:]}")
print(f"[INFO] API_TOKEN: {API_TOKEN[:10]}...{API_TOKEN[-10:]}")
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
def respond(
message,
history: list[dict],
system_message,
max_tokens,
temperature,
top_p,
):
messages = [{"role": "system", "content": system_message}]
messages.extend(history)
messages.append({"role": "user", "content": message})
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {API_TOKEN}"
}
data = {
"model": "/data/DMind-1",
"stream": True,
"messages": messages,
"temperature": temperature,
"top_p": top_p,
"top_k": 20,
"min_p": 0.1,
"max_tokens": 32768
}
try:
with requests.post(API_URL, headers=headers, json=data, stream=True) as r:
if r.status_code == 200:
current_response = ""
for line in r.iter_lines():
if line:
line = line.decode('utf-8')
if line.startswith('data: '):
try:
json_response = json.loads(line[6:])
if 'choices' in json_response and len(json_response['choices']) > 0:
delta = json_response['choices'][0].get('delta', {})
if 'content' in delta:
content = delta['content']
if content:
current_response += content
if len(current_response) > 16:
if '<think>' in current_response:
current_response = current_response.replace('<think>', '<details open><summary>ๆ่่ฟ็จ</summary>\n\n```')
if '</think>' in current_response:
current_response = current_response.replace('</think>', '```\n\n</details>')
if '**Final Answer**' in current_response:
current_response = current_response.replace('**Final Answer**', '')
formatted_response = current_response[:-16]
formatted_response = formatted_response.replace('<', '<').replace('>', '>')
formatted_response = formatted_response.replace('<details open>', '<details open>')
formatted_response = formatted_response.replace('</details>', '</details>')
formatted_response = formatted_response.replace('<summary>', '<summary>')
formatted_response = formatted_response.replace('</summary>', '</summary>')
formatted_response = formatted_response.replace('*', '\\*')
yield formatted_response
except json.JSONDecodeError:
continue
if current_response:
if '<think>' in current_response:
current_response = current_response.replace('<think>', '<details open><summary>ๆ่่ฟ็จ</summary>\n\n```')
if '</think>' in current_response:
current_response = current_response.replace('</think>', '```\n\n</details>')
if '**Final Answer**' in current_response:
current_response = current_response.replace('**Final Answer**', '')
formatted_response = current_response
formatted_response = formatted_response.replace('<', '<').replace('>', '>')
formatted_response = formatted_response.replace('<details open>', '<details open>')
formatted_response = formatted_response.replace('</details>', '</details>')
formatted_response = formatted_response.replace('<summary>', '<summary>')
formatted_response = formatted_response.replace('</summary>', '</summary>')
formatted_response = formatted_response.replace('*', '\\*')
yield formatted_response
else:
print(f"[ERROR] Bad status code: {r.status_code}, response: {r.text}")
yield "Service temporarily unavailable"
except Exception as e:
print(f"[ERROR] Request error: {e}")
yield "Service error occurred"
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are DMind AI Assistant, built by DMind.AI. Never present as Qwen, ChatGPT, Tongyi, OpenAI, etc.", label="System message"),
gr.Slider(minimum=1, maximum=32768, value=16384, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.6, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
type="messages",
css="""
.prose pre {
white-space: pre-wrap !important;
word-wrap: break-word !important;
overflow-wrap: break-word !important;
max-width: 100% !important;
}
.prose code {
white-space: pre-wrap !important;
word-wrap: break-word !important;
overflow-wrap: break-word !important;
max-width: 100% !important;
}
.prose pre code {
white-space: pre-wrap !important;
word-wrap: break-word !important;
overflow-wrap: break-word !important;
max-width: 100% !important;
}
.accordion {
margin: 0 !important;
border: none !important;
}
.accordion-header {
background: #f0f0f0 !important;
padding: 8px !important;
cursor: pointer !important;
}
.accordion-content {
padding: 8px !important;
}
"""
)
if __name__ == "__main__":
demo.launch()
|