Spaces:
Runtime error
Runtime error
Upload 5 files
Browse files- Dockerfile +13 -60
- app.py +54 -26
- requirements.txt +0 -0
- templates/dashboard.html +4 -4
Dockerfile
CHANGED
|
@@ -1,71 +1,24 @@
|
|
| 1 |
-
|
| 2 |
-
# TODO: 解决完整版本的依赖问题后替换此文件
|
| 3 |
|
| 4 |
-
#
|
| 5 |
-
|
| 6 |
|
| 7 |
-
# 安装基本依赖
|
| 8 |
-
RUN apk add --no-cache git build-base ca-certificates
|
| 9 |
-
|
| 10 |
-
# 设置工作目录
|
| 11 |
WORKDIR /app
|
| 12 |
|
| 13 |
-
|
| 14 |
-
RUN
|
| 15 |
-
|
| 16 |
-
# 预先下载依赖
|
| 17 |
-
RUN go get github.com/google/generative-ai-go/[email protected]
|
| 18 |
-
RUN go get google.golang.org/[email protected]
|
| 19 |
-
RUN go get github.com/gin-gonic/[email protected]
|
| 20 |
-
RUN go get github.com/go-playground/validator/[email protected]
|
| 21 |
-
RUN go get github.com/gabriel-vasile/[email protected]
|
| 22 |
-
|
| 23 |
-
# 复制源代码
|
| 24 |
-
COPY main.go .
|
| 25 |
-
|
| 26 |
-
# 确保依赖关系
|
| 27 |
-
RUN go mod tidy
|
| 28 |
-
RUN go mod download
|
| 29 |
-
|
| 30 |
-
# 构建
|
| 31 |
-
RUN CGO_ENABLED=0 GOOS=linux go build -o tokenizer -a -installsuffix cgo -ldflags="-w -s" .
|
| 32 |
-
|
| 33 |
-
# 第二阶段:构建Python环境
|
| 34 |
-
FROM python:3.9-slim
|
| 35 |
-
|
| 36 |
-
# 安装基本依赖
|
| 37 |
-
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 38 |
-
ca-certificates \
|
| 39 |
-
&& rm -rf /var/lib/apt/lists/*
|
| 40 |
-
|
| 41 |
-
# 设置工作目录
|
| 42 |
-
WORKDIR /app
|
| 43 |
-
|
| 44 |
-
# 复制Go二进制文件
|
| 45 |
-
COPY --from=go-builder /app/tokenizer .
|
| 46 |
-
|
| 47 |
-
# 复制Python服务文件和tokenizer文件
|
| 48 |
-
COPY deepseek_v3_tokenizer /app/deepseek_v3_tokenizer
|
| 49 |
-
COPY openai_service.py /app/
|
| 50 |
|
| 51 |
-
|
| 52 |
-
RUN pip install --no-cache-dir flask transformers tiktoken
|
| 53 |
|
| 54 |
# 设置环境变量
|
| 55 |
-
ENV
|
| 56 |
ENV PORT=7860
|
| 57 |
-
ENV DEEPSEEK_URL=http://127.0.0.1:7861
|
| 58 |
-
ENV OPENAI_URL=http://127.0.0.1:7862
|
| 59 |
|
| 60 |
-
#
|
| 61 |
-
RUN
|
| 62 |
-
python /app/deepseek_v3_tokenizer/deepseek_service.py & \n\
|
| 63 |
-
python /app/openai_service.py & \n\
|
| 64 |
-
sleep 5\n\
|
| 65 |
-
./tokenizer' > /app/start.sh && chmod +x /app/start.sh
|
| 66 |
|
| 67 |
-
#
|
| 68 |
-
EXPOSE 7860
|
| 69 |
|
| 70 |
-
#
|
| 71 |
-
CMD ["
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
|
|
|
| 2 |
|
| 3 |
+
# 设置用户为root
|
| 4 |
+
USER root
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
WORKDIR /app
|
| 7 |
|
| 8 |
+
COPY requirements.txt .
|
| 9 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
+
COPY . .
|
|
|
|
| 12 |
|
| 13 |
# 设置环境变量
|
| 14 |
+
ENV HOST=0.0.0.0
|
| 15 |
ENV PORT=7860
|
|
|
|
|
|
|
| 16 |
|
| 17 |
+
# 删除敏感文件
|
| 18 |
+
RUN rm -f config.json password.txt
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
+
# 暴露端口(Hugging Face默认使用7860端口)
|
| 21 |
+
EXPOSE 7860
|
| 22 |
|
| 23 |
+
# 启动命令
|
| 24 |
+
CMD ["python", "app.py"]
|
app.py
CHANGED
|
@@ -12,12 +12,13 @@ import jwt
|
|
| 12 |
import os
|
| 13 |
import threading
|
| 14 |
from datetime import datetime, timedelta
|
| 15 |
-
import tiktoken # 导入tiktoken来计算token数量
|
| 16 |
|
| 17 |
app = Flask(__name__, template_folder='templates')
|
| 18 |
app.secret_key = os.environ.get("SECRET_KEY", "abacus_chat_proxy_secret_key")
|
| 19 |
app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(days=7)
|
| 20 |
|
|
|
|
|
|
|
| 21 |
|
| 22 |
API_ENDPOINT_URL = "https://abacus.ai/api/v0/describeDeployment"
|
| 23 |
MODEL_LIST_URL = "https://abacus.ai/api/v0/listExternalApplications"
|
|
@@ -679,7 +680,7 @@ def send_message(message, model, think=False):
|
|
| 679 |
trace_id, sentry_trace = generate_trace_id()
|
| 680 |
|
| 681 |
# 计算输入token
|
| 682 |
-
prompt_tokens = num_tokens_from_string(message)
|
| 683 |
completion_buffer = io.StringIO() # 收集所有输出用于计算token
|
| 684 |
|
| 685 |
headers = {
|
|
@@ -787,8 +788,8 @@ def send_message(message, model, think=False):
|
|
| 787 |
yield "data: [DONE]\n\n"
|
| 788 |
|
| 789 |
# 在流式传输完成后计算token并更新统计
|
| 790 |
-
|
| 791 |
-
update_model_stats(model, prompt_tokens,
|
| 792 |
|
| 793 |
# 如果需要删除上一个对话且上一个对话ID不为空且与当前不同
|
| 794 |
if DELETE_CHAT and last_conversation_id and last_conversation_id != conversation_id:
|
|
@@ -818,7 +819,7 @@ def send_message_non_stream(message, model, think=False):
|
|
| 818 |
trace_id, sentry_trace = generate_trace_id()
|
| 819 |
|
| 820 |
# 计算输入token
|
| 821 |
-
prompt_tokens = num_tokens_from_string(message)
|
| 822 |
|
| 823 |
headers = {
|
| 824 |
"accept": "text/event-stream",
|
|
@@ -916,8 +917,8 @@ def send_message_non_stream(message, model, think=False):
|
|
| 916 |
response_content = content_buffer.getvalue()
|
| 917 |
|
| 918 |
# 计算输出token并更新统计信息
|
| 919 |
-
|
| 920 |
-
update_model_stats(model, prompt_tokens,
|
| 921 |
|
| 922 |
# 如果需要删除上一个对话且上一个对话ID不为空且与当前不同
|
| 923 |
if DELETE_CHAT and last_conversation_id and last_conversation_id != conversation_id:
|
|
@@ -938,8 +939,8 @@ def send_message_non_stream(message, model, think=False):
|
|
| 938 |
}],
|
| 939 |
"usage": {
|
| 940 |
"prompt_tokens": prompt_tokens,
|
| 941 |
-
"completion_tokens":
|
| 942 |
-
"total_tokens": prompt_tokens +
|
| 943 |
}
|
| 944 |
})
|
| 945 |
else:
|
|
@@ -953,8 +954,8 @@ def send_message_non_stream(message, model, think=False):
|
|
| 953 |
response_content = buffer.getvalue()
|
| 954 |
|
| 955 |
# 计算输出token并更新统计信息
|
| 956 |
-
|
| 957 |
-
update_model_stats(model, prompt_tokens,
|
| 958 |
|
| 959 |
# 如果需要删除上一个对话且上一个对话ID不为空且与当前不同
|
| 960 |
if DELETE_CHAT and last_conversation_id and last_conversation_id != conversation_id:
|
|
@@ -975,8 +976,8 @@ def send_message_non_stream(message, model, think=False):
|
|
| 975 |
}],
|
| 976 |
"usage": {
|
| 977 |
"prompt_tokens": prompt_tokens,
|
| 978 |
-
"completion_tokens":
|
| 979 |
-
"total_tokens": prompt_tokens +
|
| 980 |
}
|
| 981 |
})
|
| 982 |
except requests.exceptions.RequestException as e:
|
|
@@ -1068,22 +1069,49 @@ def index():
|
|
| 1068 |
return redirect(url_for('dashboard'))
|
| 1069 |
|
| 1070 |
|
| 1071 |
-
|
| 1072 |
-
|
| 1073 |
-
"""计算文本的token数量"""
|
| 1074 |
try:
|
| 1075 |
-
|
| 1076 |
-
|
| 1077 |
-
|
| 1078 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1079 |
except Exception as e:
|
| 1080 |
-
#
|
| 1081 |
-
|
| 1082 |
-
|
| 1083 |
-
|
| 1084 |
|
| 1085 |
# 更新模型使用统计
|
| 1086 |
-
def update_model_stats(model, prompt_tokens, completion_tokens):
|
| 1087 |
global model_usage_stats, total_tokens, model_usage_records
|
| 1088 |
|
| 1089 |
# 添加调用记录
|
|
@@ -1098,7 +1126,7 @@ def update_model_stats(model, prompt_tokens, completion_tokens):
|
|
| 1098 |
"call_time": call_time,
|
| 1099 |
"prompt_tokens": prompt_tokens,
|
| 1100 |
"completion_tokens": completion_tokens,
|
| 1101 |
-
"calculation_method":
|
| 1102 |
}
|
| 1103 |
model_usage_records.append(record)
|
| 1104 |
|
|
|
|
| 12 |
import os
|
| 13 |
import threading
|
| 14 |
from datetime import datetime, timedelta
|
|
|
|
| 15 |
|
| 16 |
app = Flask(__name__, template_folder='templates')
|
| 17 |
app.secret_key = os.environ.get("SECRET_KEY", "abacus_chat_proxy_secret_key")
|
| 18 |
app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(days=7)
|
| 19 |
|
| 20 |
+
# 添加tokenizer服务URL
|
| 21 |
+
TOKENIZER_SERVICE_URL = "https://esotlam-tokenizer.hf.space/count_tokens"
|
| 22 |
|
| 23 |
API_ENDPOINT_URL = "https://abacus.ai/api/v0/describeDeployment"
|
| 24 |
MODEL_LIST_URL = "https://abacus.ai/api/v0/listExternalApplications"
|
|
|
|
| 680 |
trace_id, sentry_trace = generate_trace_id()
|
| 681 |
|
| 682 |
# 计算输入token
|
| 683 |
+
prompt_tokens, calculation_method = num_tokens_from_string(message, model)
|
| 684 |
completion_buffer = io.StringIO() # 收集所有输出用于计算token
|
| 685 |
|
| 686 |
headers = {
|
|
|
|
| 788 |
yield "data: [DONE]\n\n"
|
| 789 |
|
| 790 |
# 在流式传输完成后计算token并更新统计
|
| 791 |
+
completion_result, _ = num_tokens_from_string(completion_buffer.getvalue(), model)
|
| 792 |
+
update_model_stats(model, prompt_tokens, completion_result, calculation_method)
|
| 793 |
|
| 794 |
# 如果需要删除上一个对话且上一个对话ID不为空且与当前不同
|
| 795 |
if DELETE_CHAT and last_conversation_id and last_conversation_id != conversation_id:
|
|
|
|
| 819 |
trace_id, sentry_trace = generate_trace_id()
|
| 820 |
|
| 821 |
# 计算输入token
|
| 822 |
+
prompt_tokens, calculation_method = num_tokens_from_string(message, model)
|
| 823 |
|
| 824 |
headers = {
|
| 825 |
"accept": "text/event-stream",
|
|
|
|
| 917 |
response_content = content_buffer.getvalue()
|
| 918 |
|
| 919 |
# 计算输出token并更新统计信息
|
| 920 |
+
completion_result, _ = num_tokens_from_string(think_content + response_content, model)
|
| 921 |
+
update_model_stats(model, prompt_tokens, completion_result, calculation_method)
|
| 922 |
|
| 923 |
# 如果需要删除上一个对话且上一个对话ID不为空且与当前不同
|
| 924 |
if DELETE_CHAT and last_conversation_id and last_conversation_id != conversation_id:
|
|
|
|
| 939 |
}],
|
| 940 |
"usage": {
|
| 941 |
"prompt_tokens": prompt_tokens,
|
| 942 |
+
"completion_tokens": completion_result,
|
| 943 |
+
"total_tokens": prompt_tokens + completion_result
|
| 944 |
}
|
| 945 |
})
|
| 946 |
else:
|
|
|
|
| 954 |
response_content = buffer.getvalue()
|
| 955 |
|
| 956 |
# 计算输出token并更新统计信息
|
| 957 |
+
completion_result, _ = num_tokens_from_string(response_content, model)
|
| 958 |
+
update_model_stats(model, prompt_tokens, completion_result, calculation_method)
|
| 959 |
|
| 960 |
# 如果需要删除上一个对话且上一个对话ID不为空且与当前不同
|
| 961 |
if DELETE_CHAT and last_conversation_id and last_conversation_id != conversation_id:
|
|
|
|
| 976 |
}],
|
| 977 |
"usage": {
|
| 978 |
"prompt_tokens": prompt_tokens,
|
| 979 |
+
"completion_tokens": completion_result,
|
| 980 |
+
"total_tokens": prompt_tokens + completion_result
|
| 981 |
}
|
| 982 |
})
|
| 983 |
except requests.exceptions.RequestException as e:
|
|
|
|
| 1069 |
return redirect(url_for('dashboard'))
|
| 1070 |
|
| 1071 |
|
| 1072 |
+
def num_tokens_from_string(string, model=""):
|
| 1073 |
+
"""计算字符串的token数量"""
|
|
|
|
| 1074 |
try:
|
| 1075 |
+
# 准备请求数据
|
| 1076 |
+
request_data = {
|
| 1077 |
+
"model": model,
|
| 1078 |
+
"messages": [{"role": "user", "content": string}]
|
| 1079 |
+
}
|
| 1080 |
+
|
| 1081 |
+
# 发送POST请求到token计算服务
|
| 1082 |
+
response = requests.post(
|
| 1083 |
+
TOKENIZER_SERVICE_URL,
|
| 1084 |
+
json=request_data,
|
| 1085 |
+
timeout=10
|
| 1086 |
+
)
|
| 1087 |
+
|
| 1088 |
+
# 解析响应
|
| 1089 |
+
if response.status_code == 200:
|
| 1090 |
+
result = response.json()
|
| 1091 |
+
# 检查响应中是否包含warning字段,有则表示是估算值
|
| 1092 |
+
calculation_method = "estimate" if "warning" in result else "api"
|
| 1093 |
+
input_tokens = result.get("input_tokens", 0)
|
| 1094 |
+
return input_tokens, calculation_method
|
| 1095 |
+
elif response.status_code == 400:
|
| 1096 |
+
# 服务返回400错误,但可能提供了估算值
|
| 1097 |
+
result = response.json()
|
| 1098 |
+
if "input_tokens" in result:
|
| 1099 |
+
print(f"使用估算token值: {result.get('input_tokens')}")
|
| 1100 |
+
return result.get("input_tokens", 0), "estimate"
|
| 1101 |
+
# 如果没有提供估算值,使用字符数/4作为预估
|
| 1102 |
+
return len(string) // 4, "estimate"
|
| 1103 |
+
else:
|
| 1104 |
+
# 如果服务返回其他错误,记录错误并返回字符串长度/4作为预估值
|
| 1105 |
+
print(f"Tokenizer服务错误: {response.status_code} - {response.text}")
|
| 1106 |
+
return len(string) // 4, "estimate"
|
| 1107 |
except Exception as e:
|
| 1108 |
+
# 如果发生其他错误,记录错误并返回字符串长度/4作为预估值
|
| 1109 |
+
print(f"计算token错误: {e}")
|
| 1110 |
+
return len(string) // 4, "estimate"
|
| 1111 |
+
|
| 1112 |
|
| 1113 |
# 更新模型使用统计
|
| 1114 |
+
def update_model_stats(model, prompt_tokens, completion_tokens, calculation_method="estimate"):
|
| 1115 |
global model_usage_stats, total_tokens, model_usage_records
|
| 1116 |
|
| 1117 |
# 添加调用记录
|
|
|
|
| 1126 |
"call_time": call_time,
|
| 1127 |
"prompt_tokens": prompt_tokens,
|
| 1128 |
"completion_tokens": completion_tokens,
|
| 1129 |
+
"calculation_method": calculation_method
|
| 1130 |
}
|
| 1131 |
model_usage_records.append(record)
|
| 1132 |
|
requirements.txt
CHANGED
|
Binary files a/requirements.txt and b/requirements.txt differ
|
|
|
templates/dashboard.html
CHANGED
|
@@ -467,7 +467,7 @@
|
|
| 467 |
font-weight: 500;
|
| 468 |
}
|
| 469 |
|
| 470 |
-
.token-method.
|
| 471 |
background: rgba(54, 211, 153, 0.2);
|
| 472 |
color: var(--success-color);
|
| 473 |
border: 1px solid rgba(54, 211, 153, 0.3);
|
|
@@ -879,8 +879,8 @@
|
|
| 879 |
<td class="token-count">{{ record.prompt_tokens|int }}</td>
|
| 880 |
<td class="token-count">{{ record.completion_tokens|int }}</td>
|
| 881 |
<td>
|
| 882 |
-
{% if record.calculation_method
|
| 883 |
-
<span class="token-method
|
| 884 |
{% else %}
|
| 885 |
<span class="token-method estimate">估算</span>
|
| 886 |
{% endif %}
|
|
@@ -890,7 +890,7 @@
|
|
| 890 |
</tbody>
|
| 891 |
</table>
|
| 892 |
<div class="token-note">
|
| 893 |
-
<small>* Token计算方式:<span class="token-method
|
| 894 |
</div>
|
| 895 |
</div>
|
| 896 |
</div>
|
|
|
|
| 467 |
font-weight: 500;
|
| 468 |
}
|
| 469 |
|
| 470 |
+
.token-method.api {
|
| 471 |
background: rgba(54, 211, 153, 0.2);
|
| 472 |
color: var(--success-color);
|
| 473 |
border: 1px solid rgba(54, 211, 153, 0.3);
|
|
|
|
| 879 |
<td class="token-count">{{ record.prompt_tokens|int }}</td>
|
| 880 |
<td class="token-count">{{ record.completion_tokens|int }}</td>
|
| 881 |
<td>
|
| 882 |
+
{% if record.calculation_method in ["api"] %}
|
| 883 |
+
<span class="token-method api">精确</span>
|
| 884 |
{% else %}
|
| 885 |
<span class="token-method estimate">估算</span>
|
| 886 |
{% endif %}
|
|
|
|
| 890 |
</tbody>
|
| 891 |
</table>
|
| 892 |
<div class="token-note">
|
| 893 |
+
<small>* Token计算方式:<span class="token-method api">精确</span> 表示调用官方API精确计算,<span class="token-method estimate">估算</span> 表示使用gpt-4o模型估算。所有统计数据仅供参考,不代表实际计费标准。</small>
|
| 894 |
</div>
|
| 895 |
</div>
|
| 896 |
</div>
|