malt666 commited on
Commit
ad7a735
·
verified ·
1 Parent(s): 502cd20

Delete openai_service.py

Browse files
Files changed (1) hide show
  1. openai_service.py +0 -118
openai_service.py DELETED
@@ -1,118 +0,0 @@
1
- from flask import Flask, request, jsonify
2
- import tiktoken
3
- import os
4
-
5
- app = Flask(__name__)
6
-
7
- # OpenAI模型映射
8
- MODEL_MAPPINGS = {
9
- # GPT-4系列
10
- "gpt-4o": "o200k_base",
11
- "gpt-4-turbo": "cl100k_base",
12
- "gpt-4": "cl100k_base",
13
-
14
- # GPT-3.5系列
15
- "gpt-3.5-turbo": "cl100k_base",
16
- "gpt-35-turbo": "cl100k_base",
17
-
18
- # 旧模型
19
- "text-davinci-003": "p50k_base",
20
- "text-davinci-002": "p50k_base",
21
- "davinci": "r50k_base",
22
-
23
- # 嵌入模型
24
- "text-embedding-ada-002": "cl100k_base",
25
- }
26
-
27
- @app.route('/count_tokens', methods=['POST'])
28
- def count_tokens():
29
- try:
30
- data = request.json
31
- messages = data.get('messages', [])
32
- system = data.get('system')
33
- model = data.get('model', 'gpt-3.5-turbo')
34
-
35
- # 根据模型名称选择合适的编码器
36
- model_key = model.lower()
37
- encoding_name = None
38
-
39
- # 查找完全匹配
40
- if model_key in MODEL_MAPPINGS:
41
- encoding_name = MODEL_MAPPINGS[model_key]
42
- else:
43
- # 查找部分匹配
44
- for key in MODEL_MAPPINGS:
45
- if key in model_key:
46
- encoding_name = MODEL_MAPPINGS[key]
47
- break
48
-
49
- # 如果没有找到匹配,使用默认的cl100k_base编码器
50
- if not encoding_name:
51
- encoding_name = "cl100k_base" # 最常用的编码器
52
-
53
- # 获取编码器
54
- try:
55
- encoding = tiktoken.get_encoding(encoding_name)
56
- except KeyError:
57
- # 如果找不到编码器,使用gpt-3.5-turbo的编码器
58
- encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
59
-
60
- # 计算tokens
61
- total_tokens = 0
62
-
63
- # 按照OpenAI的格式计算tokens
64
- # 参考: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
65
-
66
- # 对于ChatGPT模型,每个请求都有3个隐藏tokens
67
- if encoding_name in ["cl100k_base", "o200k_base"]:
68
- # 每条消息开头有3个token,结尾有1个token
69
- total_tokens += 3 # 每个请求的起始tokens
70
-
71
- # 计算每条消息的tokens
72
- for message in messages:
73
- total_tokens += 4 # 每条消息增加4个token (包括角色)
74
-
75
- for key, value in message.items():
76
- total_tokens += len(encoding.encode(value))
77
-
78
- # 名称字段比较少见,但也计入
79
- if key == "name":
80
- total_tokens -= 1 # 角色名称单独token计算减免
81
-
82
- # 计算system消息的token
83
- if system:
84
- total_tokens += 4 # system消息也视为一条消息
85
- total_tokens += len(encoding.encode(system))
86
- else:
87
- # 对于旧模型,只计算文本的token数量
88
- all_text = ""
89
- if system:
90
- all_text += system + "\n\n"
91
-
92
- for message in messages:
93
- role = message.get('role', '')
94
- content = message.get('content', '')
95
- all_text += f"{role}: {content}\n"
96
-
97
- total_tokens = len(encoding.encode(all_text))
98
-
99
- return jsonify({
100
- 'input_tokens': total_tokens,
101
- 'model': model,
102
- 'encoding': encoding_name
103
- })
104
- except Exception as e:
105
- return jsonify({
106
- 'error': str(e)
107
- }), 400
108
-
109
- @app.route('/health', methods=['GET'])
110
- def health():
111
- return jsonify({
112
- 'status': 'healthy',
113
- 'tokenizer': 'openai-tiktoken',
114
- 'supported_models': list(MODEL_MAPPINGS.keys())
115
- })
116
-
117
- if __name__ == '__main__':
118
- app.run(host='127.0.0.1', port=7862)