bibibi12345 commited on
Commit
3f1c280
·
verified ·
1 Parent(s): 422a61b

Update app/main.py

Browse files
Files changed (1) hide show
  1. app/main.py +79 -27
app/main.py CHANGED
@@ -579,29 +579,29 @@ Ready for your request."""
579
  )
580
  ]
581
 
582
- # # --- Find the index of the single assistant message to encrypt ---
583
- # target_assistant_index = -1
584
- # num_messages = len(messages)
585
- # for i in range(num_messages - 1, -1, -1): # Iterate backwards
586
- # if messages[i].role == 'assistant':
587
- # # Condition 1: Is assistant message - met.
588
- # # Condition 2: Not the last message overall?
589
- # is_last_overall = (i == num_messages - 1)
590
- # if is_last_overall:
591
- # continue # Cannot be the target if it's the last message
592
-
593
- # # Condition 3: Has a user/system message after it?
594
- # has_user_system_after = False
595
- # for k in range(i + 1, num_messages):
596
- # if messages[k].role in ['user', 'system']:
597
- # has_user_system_after = True
598
- # break
599
 
600
- # if has_user_system_after:
601
- # # This is the last assistant message meeting all criteria
602
- # target_assistant_index = i
603
- # print(f"DEBUG: Identified target assistant message for encoding at index {target_assistant_index}")
604
- # break # Found the target, stop searching
605
 
606
  # --- Create the new message list with specific encoding ---
607
  new_messages = []
@@ -618,13 +618,13 @@ Ready for your request."""
618
 
619
  if message.role == "user":
620
  encode_this_message = True
621
- # print(f"DEBUG: Encoding user message (index {i})")
622
- # elif message.role == "assistant" and i == target_assistant_index:
623
- # encode_this_message = True
624
- # print(f"DEBUG: Encoding target assistant message (index {i})")
625
  else:
626
  # Keep system, other assistant, tool messages as is
627
- # print(f"DEBUG: Passing through message (index {i}, role {message.role}) without encoding")
628
  new_messages.append(message)
629
  continue # Skip encoding logic below for this message
630
 
@@ -1248,6 +1248,33 @@ async def list_models(api_key: str = Depends(get_api_key)):
1248
  "root": "gemini-2.5-flash-preview-04-17",
1249
  "parent": None,
1250
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1251
  {
1252
  "id": "gemini-1.5-flash-8b",
1253
  "object": "model",
@@ -1326,6 +1353,8 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
1326
  is_grounded_search = request.model.endswith("-search")
1327
  is_encrypted_model = request.model.endswith("-encrypt")
1328
  is_encrypted_full_model = request.model.endswith("-encrypt-full")
 
 
1329
 
1330
  if is_auto_model:
1331
  base_model_name = request.model.replace("-auto", "")
@@ -1335,6 +1364,22 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
1335
  base_model_name = request.model.replace("-encrypt", "")
1336
  elif is_encrypted_full_model:
1337
  base_model_name = request.model.replace("-encrypt-full", "")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1338
  else:
1339
  base_model_name = request.model
1340
 
@@ -1585,6 +1630,13 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
1585
  ]
1586
  current_config["system_instruction"] = encryption_instructions
1587
  current_prompt_func = create_encrypted_full_gemini_prompt
 
 
 
 
 
 
 
1588
 
1589
 
1590
  try:
 
579
  )
580
  ]
581
 
582
+ # --- Find the index of the single assistant message to encrypt ---
583
+ target_assistant_index = -1
584
+ num_messages = len(messages)
585
+ for i in range(num_messages - 1, -1, -1): # Iterate backwards
586
+ if messages[i].role == 'assistant':
587
+ # Condition 1: Is assistant message - met.
588
+ # Condition 2: Not the last message overall?
589
+ is_last_overall = (i == num_messages - 1)
590
+ if is_last_overall:
591
+ continue # Cannot be the target if it's the last message
592
+
593
+ # Condition 3: Has a user/system message after it?
594
+ has_user_system_after = False
595
+ for k in range(i + 1, num_messages):
596
+ if messages[k].role in ['user', 'system']:
597
+ has_user_system_after = True
598
+ break
599
 
600
+ if has_user_system_after:
601
+ # This is the last assistant message meeting all criteria
602
+ target_assistant_index = i
603
+ print(f"DEBUG: Identified target assistant message for encoding at index {target_assistant_index}")
604
+ break # Found the target, stop searching
605
 
606
  # --- Create the new message list with specific encoding ---
607
  new_messages = []
 
618
 
619
  if message.role == "user":
620
  encode_this_message = True
621
+ print(f"DEBUG: Encoding user message (index {i})")
622
+ elif message.role == "assistant" and i == target_assistant_index:
623
+ encode_this_message = True
624
+ print(f"DEBUG: Encoding target assistant message (index {i})")
625
  else:
626
  # Keep system, other assistant, tool messages as is
627
+ print(f"DEBUG: Passing through message (index {i}, role {message.role}) without encoding")
628
  new_messages.append(message)
629
  continue # Skip encoding logic below for this message
630
 
 
1248
  "root": "gemini-2.5-flash-preview-04-17",
1249
  "parent": None,
1250
  },
1251
+ {
1252
+ "id": "gemini-2.5-flash-preview-04-17-encrypt",
1253
+ "object": "model",
1254
+ "created": int(time.time()),
1255
+ "owned_by": "google",
1256
+ "permission": [],
1257
+ "root": "gemini-2.5-flash-preview-04-17",
1258
+ "parent": None,
1259
+ },
1260
+ {
1261
+ "id": "gemini-2.5-flash-preview-04-17-nothinking",
1262
+ "object": "model",
1263
+ "created": int(time.time()),
1264
+ "owned_by": "google",
1265
+ "permission": [],
1266
+ "root": "gemini-2.5-flash-preview-04-17",
1267
+ "parent": None,
1268
+ },
1269
+ {
1270
+ "id": "gemini-2.5-flash-preview-04-17-max",
1271
+ "object": "model",
1272
+ "created": int(time.time()),
1273
+ "owned_by": "google",
1274
+ "permission": [],
1275
+ "root": "gemini-2.5-flash-preview-04-17",
1276
+ "parent": None,
1277
+ },
1278
  {
1279
  "id": "gemini-1.5-flash-8b",
1280
  "object": "model",
 
1353
  is_grounded_search = request.model.endswith("-search")
1354
  is_encrypted_model = request.model.endswith("-encrypt")
1355
  is_encrypted_full_model = request.model.endswith("-encrypt-full")
1356
+ is_nothinking_model = request.model.endswith("-nothinking")
1357
+ is_max_thinking_model = request.model.endswith("-max")
1358
 
1359
  if is_auto_model:
1360
  base_model_name = request.model.replace("-auto", "")
 
1364
  base_model_name = request.model.replace("-encrypt", "")
1365
  elif is_encrypted_full_model:
1366
  base_model_name = request.model.replace("-encrypt-full", "")
1367
+ elif is_nothinking_model:
1368
+ base_model_name = request.model.replace("-nothinking","")
1369
+ # Specific check for the flash model requiring budget
1370
+ if base_model_name != "gemini-2.5-flash-preview-04-17":
1371
+ error_response = create_openai_error_response(
1372
+ 400, f"Model '{request.model}' does not support -nothinking variant", "invalid_request_error"
1373
+ )
1374
+ return JSONResponse(status_code=400, content=error_response)
1375
+ elif is_max_thinking_model:
1376
+ base_model_name = request.model.replace("-max","")
1377
+ # Specific check for the flash model requiring budget
1378
+ if base_model_name != "gemini-2.5-flash-preview-04-17":
1379
+ error_response = create_openai_error_response(
1380
+ 400, f"Model '{request.model}' does not support -max variant", "invalid_request_error"
1381
+ )
1382
+ return JSONResponse(status_code=400, content=error_response)
1383
  else:
1384
  base_model_name = request.model
1385
 
 
1630
  ]
1631
  current_config["system_instruction"] = encryption_instructions
1632
  current_prompt_func = create_encrypted_full_gemini_prompt
1633
+ elif is_nothinking_model:
1634
+ print(f"Using no thinking budget for model: {request.model}")
1635
+ current_config["thinking_config"] = {"thinking_budget": 0}
1636
+
1637
+ elif is_max_thinking_model:
1638
+ print(f"Using max thinking budget for model: {request.model}")
1639
+ current_config["thinking_config"] = {"thinking_budget": 24576}
1640
 
1641
 
1642
  try: