RendiXD commited on
Commit
a7066c4
·
verified ·
1 Parent(s): 2c09475

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +457 -0
main.py ADDED
@@ -0,0 +1,457 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify, make_response, Response
2
+ import requests
3
+ import time
4
+ import uuid
5
+ import warnings
6
+ from waitress import serve
7
+ import json
8
+ import tiktoken
9
+ import socket
10
+ from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
11
+ from mistral_common.protocol.instruct.messages import UserMessage
12
+ from mistral_common.protocol.instruct.request import ChatCompletionRequest
13
+ from pymemcache.client.base import Client
14
+ from flask_limiter import Limiter
15
+ from flask_limiter.util import get_remote_address
16
+ import os
17
+ import logging
18
+ from io import BytesIO
19
+ import coloredlogs
20
+ import printedcolors
21
+ import base64
22
+
23
+ # Suppress warnings from flask_limiter
24
+ warnings.filterwarnings("ignore", category=UserWarning, module="flask_limiter.extension")
25
+
26
+ # Create a logger object
27
+ logger = logging.getLogger("1min-relay")
28
+
29
+ # Install coloredlogs with desired log level
30
+ coloredlogs.install(level='DEBUG', logger=logger)
31
+
32
+ def check_memcached_connection(host='memcached', port=11211):
33
+ try:
34
+ client = Client((host, port))
35
+ client.set('test_key', 'test_value')
36
+ if client.get('test_key') == b'test_value':
37
+ client.delete('test_key') # Clean up
38
+ return True
39
+ else:
40
+ return False
41
+ except:
42
+ return False
43
+
44
+ logger.info('''
45
+ _ __ __ _ ___ _
46
+ / | \/ (_)_ _ | _ \___| |__ _ _ _
47
+ | | |\/| | | ' \| / -_) / _` | || |
48
+ |_|_| |_|_|_||_|_|_\___|_\__,_|\_, |
49
+ |__/ ''')
50
+
51
+
52
+ def calculate_token(sentence, model="DEFAULT"):
53
+ """Calculate the number of tokens in a sentence based on the specified model."""
54
+
55
+ if model.startswith("mistral"):
56
+ # Initialize the Mistral tokenizer
57
+ tokenizer = MistralTokenizer.v3(is_tekken=True)
58
+ model_name = "open-mistral-nemo" # Default to Mistral Nemo
59
+ tokenizer = MistralTokenizer.from_model(model_name)
60
+ tokenized = tokenizer.encode_chat_completion(
61
+ ChatCompletionRequest(
62
+ messages=[
63
+ UserMessage(content=sentence),
64
+ ],
65
+ model=model_name,
66
+ )
67
+ )
68
+ tokens = tokenized.tokens
69
+ return len(tokens)
70
+
71
+ elif model in ["gpt-3.5-turbo", "gpt-4"]:
72
+ # Use OpenAI's tiktoken for GPT models
73
+ encoding = tiktoken.encoding_for_model(model)
74
+ tokens = encoding.encode(sentence)
75
+ return len(tokens)
76
+
77
+ else:
78
+ # Default to openai
79
+ encoding = tiktoken.encoding_for_model("gpt-4")
80
+ tokens = encoding.encode(sentence)
81
+ return len(tokens)
82
+ app = Flask(__name__)
83
+ if check_memcached_connection():
84
+ limiter = Limiter(
85
+ get_remote_address,
86
+ app=app,
87
+ storage_uri="memcached://memcached:11211", # Connect to Memcached created with docker
88
+ )
89
+ else:
90
+ # Used for ratelimiting without memcached
91
+ limiter = Limiter(
92
+ get_remote_address,
93
+ app=app,
94
+ )
95
+ logger.warning("Memcached is not available. Using in-memory storage for rate limiting. Not-Recommended")
96
+
97
+
98
+ ONE_MIN_API_URL = "https://api.1min.ai/api/features"
99
+ ONE_MIN_CONVERSATION_API_URL = "https://api.1min.ai/api/conversations"
100
+ ONE_MIN_CONVERSATION_API_STREAMING_URL = "https://api.1min.ai/api/features?isStreaming=true"
101
+ ONE_MIN_ASSET_URL = "https://api.1min.ai/api/assets"
102
+
103
+ # Define the models that are available for use
104
+ ALL_ONE_MIN_AVAILABLE_MODELS = [
105
+ "deepseek-chat",
106
+ "deepseek-reasoner",
107
+ "o1-preview",
108
+ "o1-mini",
109
+ "gpt-4o-mini",
110
+ "gpt-4o",
111
+ "gpt-4-turbo",
112
+ "gpt-4",
113
+ "gpt-3.5-turbo",
114
+ "claude-instant-1.2",
115
+ "claude-2.1",
116
+ "claude-3-7-sonnet-20250219",
117
+ "claude-3-5-sonnet-20240620",
118
+ "claude-3-opus-20240229",
119
+ "claude-3-sonnet-20240229",
120
+ "claude-3-haiku-20240307",
121
+ "gemini-1.0-pro",
122
+ "gemini-1.5-pro",
123
+ "gemini-1.5-flash",
124
+ "mistral-large-latest",
125
+ "mistral-small-latest",
126
+ "mistral-nemo",
127
+ "open-mistral-7b",
128
+
129
+ # Replicate
130
+ "meta/llama-2-70b-chat",
131
+ "meta/meta-llama-3-70b-instruct",
132
+ "meta/meta-llama-3.1-405b-instruct",
133
+ "command"
134
+ ]
135
+
136
+ # Define the models that support vision inputs
137
+ vision_supported_models = [
138
+ "gpt-4o",
139
+ "gpt-4o-mini",
140
+ "gpt-4-turbo"
141
+ ]
142
+
143
+
144
+ # Default values
145
+ SUBSET_OF_ONE_MIN_PERMITTED_MODELS = ["mistral-nemo", "gpt-4o", "deepseek-chat"]
146
+ PERMIT_MODELS_FROM_SUBSET_ONLY = False
147
+
148
+ # Read environment variables
149
+ one_min_models_env = os.getenv("SUBSET_OF_ONE_MIN_PERMITTED_MODELS") # e.g. "mistral-nemo,gpt-4o,deepseek-chat"
150
+ permit_not_in_available_env = os.getenv("PERMIT_MODELS_FROM_SUBSET_ONLY") # e.g. "True" or "False"
151
+
152
+ # Parse or fall back to defaults
153
+ if one_min_models_env:
154
+ SUBSET_OF_ONE_MIN_PERMITTED_MODELS = one_min_models_env.split(",")
155
+
156
+ if permit_not_in_available_env and permit_not_in_available_env.lower() == "true":
157
+ PERMIT_MODELS_FROM_SUBSET_ONLY = True
158
+
159
+ # Combine into a single list
160
+ AVAILABLE_MODELS = []
161
+ AVAILABLE_MODELS.extend(SUBSET_OF_ONE_MIN_PERMITTED_MODELS)
162
+
163
+ @app.route('/', methods=['GET', 'POST'])
164
+ def index():
165
+ if request.method == 'POST':
166
+ return ERROR_HANDLER(1212)
167
+ if request.method == 'GET':
168
+ internal_ip = socket.gethostbyname(socket.gethostname())
169
+ return "Congratulations! Your API is working! You can now make requests to the API.\n\nEndpoint: " + internal_ip + ':5001/v1'
170
+ @app.route('/v1/models')
171
+ @limiter.limit("500 per minute")
172
+ def models():
173
+ # Dynamically create the list of models with additional fields
174
+ models_data = []
175
+ if not PERMIT_MODELS_FROM_SUBSET_ONLY:
176
+ one_min_models_data = [
177
+ {
178
+ "id": model_name,
179
+ "object": "model",
180
+ "owned_by": "1minai",
181
+ "created": 1727389042
182
+ }
183
+ for model_name in ALL_ONE_MIN_AVAILABLE_MODELS
184
+ ]
185
+ else:
186
+ one_min_models_data = [
187
+ {"id": model_name, "object": "model", "owned_by": "1minai", "created": 1727389042}
188
+ for model_name in SUBSET_OF_ONE_MIN_PERMITTED_MODELS
189
+ ]
190
+ models_data.extend(one_min_models_data)
191
+ return jsonify({"data": models_data, "object": "list"})
192
+
193
+ def ERROR_HANDLER(code, model=None, key=None):
194
+ # Handle errors in OpenAI-Structued Error
195
+ error_codes = { # Internal Error Codes
196
+ 1002: {"message": f"The model {model} does not exist.", "type": "invalid_request_error", "param": None, "code": "model_not_found", "http_code": 400},
197
+ 1020: {"message": f"Incorrect API key provided: {key}. You can find your API key at https://app.1min.ai/api.", "type": "authentication_error", "param": None, "code": "invalid_api_key", "http_code": 401},
198
+ 1021: {"message": "Invalid Authentication", "type": "invalid_request_error", "param": None, "code": None, "http_code": 401},
199
+ 1212: {"message": f"Incorrect Endpoint. Please use the /v1/chat/completions endpoint.", "type": "invalid_request_error", "param": None, "code": "model_not_supported", "http_code": 400},
200
+ 1044: {"message": f"This model does not support image inputs.", "type": "invalid_request_error", "param": None, "code": "model_not_supported", "http_code": 400},
201
+ 1412: {"message": f"No message provided.", "type": "invalid_request_error", "param": "messages", "code": "invalid_request_error", "http_code": 400},
202
+ 1423: {"message": f"No content in last message.", "type": "invalid_request_error", "param": "messages", "code": "invalid_request_error", "http_code": 400},
203
+ }
204
+ error_data = {k: v for k, v in error_codes.get(code, {"message": "Unknown error", "type": "unknown_error", "param": None, "code": None}).items() if k != "http_code"} # Remove http_code from the error data
205
+ logger.error(f"An error has occurred while processing the user's request. Error code: {code}")
206
+ return jsonify({"error": error_data}), error_codes.get(code, {}).get("http_code", 400) # Return the error data without http_code inside the payload and get the http_code to return.
207
+
208
+ def format_conversation_history(messages, new_input):
209
+ """
210
+ Formats the conversation history into a structured string.
211
+
212
+ Args:
213
+ messages (list): List of message dictionaries from the request
214
+ new_input (str): The new user input message
215
+
216
+ Returns:
217
+ str: Formatted conversation history
218
+ """
219
+ formatted_history = ["Conversation History:\n"]
220
+
221
+ for message in messages:
222
+ role = message.get('role', '').capitalize()
223
+ content = message.get('content', '')
224
+
225
+ # Handle potential list content
226
+ if isinstance(content, list):
227
+ content = '\n'.join(item['text'] for item in content if 'text' in item)
228
+
229
+ formatted_history.append(f"{role}: {content}")
230
+
231
+ # Append additional messages only if there are existing messages
232
+ if messages: # Save credits if it is the first message.
233
+ formatted_history.append("Respond like normal. The conversation history will be automatically updated on the next MESSAGE. DO NOT ADD User: or Assistant: to your output. Just respond like normal.")
234
+ formatted_history.append("User Message:\n")
235
+ formatted_history.append(new_input)
236
+
237
+ return '\n'.join(formatted_history)
238
+
239
+
240
+ @app.route('/v1/chat/completions', methods=['POST', 'OPTIONS'])
241
+ @limiter.limit("500 per minute")
242
+ def conversation():
243
+ if request.method == 'OPTIONS':
244
+ return handle_options_request()
245
+ image = False
246
+
247
+
248
+ auth_header = request.headers.get('Authorization')
249
+ if not auth_header or not auth_header.startswith("Bearer "):
250
+ logger.error("Invalid Authentication")
251
+ return ERROR_HANDLER(1021)
252
+
253
+ api_key = auth_header.split(" ")[1]
254
+
255
+ headers = {
256
+ 'API-KEY': api_key
257
+ }
258
+
259
+ request_data = request.json
260
+
261
+ all_messages = format_conversation_history(request_data.get('messages', []), request_data.get('new_input', ''))
262
+
263
+ messages = request_data.get('messages', [])
264
+ if not messages:
265
+ return ERROR_HANDLER(1412)
266
+
267
+ user_input = messages[-1].get('content')
268
+ if not user_input:
269
+ return ERROR_HANDLER(1423)
270
+
271
+ # Check if user_input is a list and combine text if necessary
272
+ image = False
273
+ if isinstance(user_input, list):
274
+ image_paths = []
275
+ for item in user_input:
276
+ if 'text' in item:
277
+ combined_text = '\n'.join(item['text'])
278
+ try:
279
+ if 'image_url' in item:
280
+ if request_data.get('model', 'mistral-nemo') not in vision_supported_models:
281
+ return ERROR_HANDLER(1044, request_data.get('model', 'mistral-nemo'))
282
+ if item['image_url']['url'].startswith("data:image/png;base64,"):
283
+ base64_image = item['image_url']['url'].split(",")[1]
284
+ binary_data = base64.b64decode(base64_image)
285
+ else:
286
+ binary_data = requests.get(item['image_url']['url'])
287
+ binary_data.raise_for_status() # Raise an error for bad responses
288
+ binary_data = BytesIO(binary_data.content)
289
+ files = {
290
+ 'asset': ("relay" + str(uuid.uuid4()), binary_data, 'image/png')
291
+ }
292
+ asset = requests.post(ONE_MIN_ASSET_URL, files=files, headers=headers)
293
+ asset.raise_for_status() # Raise an error for bad responses
294
+ image_path = asset.json()['fileContent']['path']
295
+ image_paths.append(image_path)
296
+ image = True
297
+ except Exception as e:
298
+ print(f"An error occurred e:" + str(e)[:60])
299
+ # Optionally log the error or return an appropriate response
300
+
301
+ user_input = str(combined_text)
302
+
303
+ prompt_token = calculate_token(str(all_messages))
304
+ if PERMIT_MODELS_FROM_SUBSET_ONLY and request_data.get('model', 'mistral-nemo') not in AVAILABLE_MODELS:
305
+ return ERROR_HANDLER(1002, request_data.get('model', 'mistral-nemo')) # Handle invalid model
306
+
307
+ logger.debug(f"Proccessing {prompt_token} prompt tokens with model {request_data.get('model', 'mistral-nemo')}")
308
+
309
+ if not image:
310
+ payload = {
311
+ "type": "CHAT_WITH_AI",
312
+ "model": request_data.get('model', 'mistral-nemo'),
313
+ "promptObject": {
314
+ "prompt": all_messages,
315
+ "isMixed": False,
316
+ "webSearch": False
317
+ }
318
+ }
319
+ else:
320
+ payload = {
321
+ "type": "CHAT_WITH_IMAGE",
322
+ "model": request_data.get('model', 'mistral-nemo'),
323
+ "promptObject": {
324
+ "prompt": all_messages,
325
+ "isMixed": False,
326
+ "imageList": image_paths
327
+ }
328
+ }
329
+
330
+ headers = {"API-KEY": api_key, 'Content-Type': 'application/json'}
331
+
332
+ if not request_data.get('stream', False):
333
+ # Non-Streaming Response
334
+ logger.debug("Non-Streaming AI Response")
335
+ response = requests.post(ONE_MIN_API_URL, json=payload, headers=headers)
336
+ response.raise_for_status()
337
+ one_min_response = response.json()
338
+
339
+ transformed_response = transform_response(one_min_response, request_data, prompt_token)
340
+ response = make_response(jsonify(transformed_response))
341
+ set_response_headers(response)
342
+
343
+ return response, 200
344
+
345
+ else:
346
+ # Streaming Response
347
+ logger.debug("Streaming AI Response")
348
+ response_stream = requests.post(ONE_MIN_CONVERSATION_API_STREAMING_URL, data=json.dumps(payload), headers=headers, stream=True)
349
+ if response_stream.status_code != 200:
350
+ if response_stream.status_code == 401:
351
+ return ERROR_HANDLER(1020)
352
+ logger.error(f"An unknown error occurred while processing the user's request. Error code: {response_stream.status_code}")
353
+ return ERROR_HANDLER(response_stream.status_code)
354
+ return Response(stream_response(response_stream, request_data, request_data.get('model', 'mistral-nemo'), int(prompt_token)), content_type='text/event-stream')
355
+ def handle_options_request():
356
+ response = make_response()
357
+ response.headers.add('Access-Control-Allow-Origin', '*')
358
+ response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization')
359
+ response.headers.add('Access-Control-Allow-Methods', 'POST, OPTIONS')
360
+ return response, 204
361
+
362
+ def transform_response(one_min_response, request_data, prompt_token):
363
+ completion_token = calculate_token(one_min_response['aiRecord']["aiRecordDetail"]["resultObject"][0])
364
+ logger.debug(f"Finished processing Non-Streaming response. Completion tokens: {str(completion_token)}")
365
+ logger.debug(f"Total tokens: {str(completion_token + prompt_token)}")
366
+ return {
367
+ "id": f"chatcmpl-{uuid.uuid4()}",
368
+ "object": "chat.completion",
369
+ "created": int(time.time()),
370
+ "model": request_data.get('model', 'mistral-nemo'),
371
+ "choices": [
372
+ {
373
+ "index": 0,
374
+ "message": {
375
+ "role": "assistant",
376
+ "content": one_min_response['aiRecord']["aiRecordDetail"]["resultObject"][0],
377
+ },
378
+ "finish_reason": "stop"
379
+ }
380
+ ],
381
+ "usage": {
382
+ "prompt_tokens": prompt_token,
383
+ "completion_tokens": completion_token,
384
+ "total_tokens": prompt_token + completion_token
385
+ }
386
+ }
387
+
388
+ def set_response_headers(response):
389
+ response.headers['Content-Type'] = 'application/json'
390
+ response.headers['Access -Control-Allow-Origin'] = '*'
391
+ response.headers['X-Request-ID'] = str (uuid.uuid4())
392
+
393
+ def stream_response(response, request_data, model, prompt_tokens):
394
+ all_chunks = ""
395
+ for chunk in response.iter_content(chunk_size=1024):
396
+ finish_reason = None
397
+
398
+ return_chunk = {
399
+ "id": f"chatcmpl-{uuid.uuid4()}",
400
+ "object": "chat.completion.chunk",
401
+ "created": int(time.time()),
402
+ "model": request_data.get('model', 'mistral-nemo'),
403
+ "choices": [
404
+ {
405
+ "index": 0,
406
+ "delta": {
407
+ "content": chunk.decode('utf-8')
408
+ },
409
+ "finish_reason": finish_reason
410
+ }
411
+ ]
412
+ }
413
+ all_chunks += chunk.decode('utf-8')
414
+ yield f"data: {json.dumps(return_chunk)}\n\n"
415
+
416
+ tokens = calculate_token(all_chunks)
417
+ logger.debug(f"Finished processing streaming response. Completion tokens: {str(tokens)}")
418
+ logger.debug(f"Total tokens: {str(tokens + prompt_tokens)}")
419
+
420
+ # Final chunk when iteration stops
421
+ final_chunk = {
422
+ "id": f"chatcmpl-{uuid.uuid4()}",
423
+ "object": "chat.completion.chunk",
424
+ "created": int(time.time()),
425
+ "model": request_data.get('model', 'mistral-nemo'),
426
+ "choices": [
427
+ {
428
+ "index": 0,
429
+ "delta": {
430
+ "content": ""
431
+ },
432
+ "finish_reason": "stop"
433
+ }
434
+ ],
435
+ "usage": {
436
+ "prompt_tokens": prompt_tokens,
437
+ "completion_tokens": tokens,
438
+ "total_tokens": tokens + prompt_tokens
439
+ }
440
+ }
441
+ yield f"data: {json.dumps(final_chunk)}\n\n"
442
+ yield "data: [DONE]\n\n"
443
+
444
+ if __name__ == '__main__':
445
+ internal_ip = socket.gethostbyname(socket.gethostname())
446
+ response = requests.get('https://api.ipify.org')
447
+ public_ip = response.text
448
+ logger.info(f"""{printedcolors.Color.fg.lightcyan}
449
+ Server is ready to serve at:
450
+ Internal IP: {internal_ip}:5001
451
+ Public IP: {public_ip} (only if you've setup port forwarding on your router.)
452
+ Enter this url to OpenAI clients supporting custom endpoint:
453
+ {internal_ip}:5001/v1
454
+ If does not work, try:
455
+ {internal_ip}:5001/v1/chat/completions
456
+ {printedcolors.Color.reset}""")
457
+ serve(app, host='0.0.0.0', port=5001, threads=6) # Thread has a default of 4 if not specified. We use 6 to increase performance and allow multiple requests at once.