Update app/main.py
Browse files- app/main.py +79 -27
app/main.py
CHANGED
@@ -579,29 +579,29 @@ Ready for your request."""
|
|
579 |
)
|
580 |
]
|
581 |
|
582 |
-
#
|
583 |
-
|
584 |
-
|
585 |
-
|
586 |
-
|
587 |
-
|
588 |
-
|
589 |
-
|
590 |
-
|
591 |
-
|
592 |
-
|
593 |
-
|
594 |
-
|
595 |
-
|
596 |
-
|
597 |
-
|
598 |
-
|
599 |
|
600 |
-
|
601 |
-
|
602 |
-
|
603 |
-
|
604 |
-
|
605 |
|
606 |
# --- Create the new message list with specific encoding ---
|
607 |
new_messages = []
|
@@ -618,13 +618,13 @@ Ready for your request."""
|
|
618 |
|
619 |
if message.role == "user":
|
620 |
encode_this_message = True
|
621 |
-
|
622 |
-
|
623 |
-
|
624 |
-
|
625 |
else:
|
626 |
# Keep system, other assistant, tool messages as is
|
627 |
-
|
628 |
new_messages.append(message)
|
629 |
continue # Skip encoding logic below for this message
|
630 |
|
@@ -1248,6 +1248,33 @@ async def list_models(api_key: str = Depends(get_api_key)):
|
|
1248 |
"root": "gemini-2.5-flash-preview-04-17",
|
1249 |
"parent": None,
|
1250 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1251 |
{
|
1252 |
"id": "gemini-1.5-flash-8b",
|
1253 |
"object": "model",
|
@@ -1326,6 +1353,8 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
|
|
1326 |
is_grounded_search = request.model.endswith("-search")
|
1327 |
is_encrypted_model = request.model.endswith("-encrypt")
|
1328 |
is_encrypted_full_model = request.model.endswith("-encrypt-full")
|
|
|
|
|
1329 |
|
1330 |
if is_auto_model:
|
1331 |
base_model_name = request.model.replace("-auto", "")
|
@@ -1335,6 +1364,22 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
|
|
1335 |
base_model_name = request.model.replace("-encrypt", "")
|
1336 |
elif is_encrypted_full_model:
|
1337 |
base_model_name = request.model.replace("-encrypt-full", "")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1338 |
else:
|
1339 |
base_model_name = request.model
|
1340 |
|
@@ -1585,6 +1630,13 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
|
|
1585 |
]
|
1586 |
current_config["system_instruction"] = encryption_instructions
|
1587 |
current_prompt_func = create_encrypted_full_gemini_prompt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1588 |
|
1589 |
|
1590 |
try:
|
|
|
579 |
)
|
580 |
]
|
581 |
|
582 |
+
# --- Find the index of the single assistant message to encrypt ---
|
583 |
+
target_assistant_index = -1
|
584 |
+
num_messages = len(messages)
|
585 |
+
for i in range(num_messages - 1, -1, -1): # Iterate backwards
|
586 |
+
if messages[i].role == 'assistant':
|
587 |
+
# Condition 1: Is assistant message - met.
|
588 |
+
# Condition 2: Not the last message overall?
|
589 |
+
is_last_overall = (i == num_messages - 1)
|
590 |
+
if is_last_overall:
|
591 |
+
continue # Cannot be the target if it's the last message
|
592 |
+
|
593 |
+
# Condition 3: Has a user/system message after it?
|
594 |
+
has_user_system_after = False
|
595 |
+
for k in range(i + 1, num_messages):
|
596 |
+
if messages[k].role in ['user', 'system']:
|
597 |
+
has_user_system_after = True
|
598 |
+
break
|
599 |
|
600 |
+
if has_user_system_after:
|
601 |
+
# This is the last assistant message meeting all criteria
|
602 |
+
target_assistant_index = i
|
603 |
+
print(f"DEBUG: Identified target assistant message for encoding at index {target_assistant_index}")
|
604 |
+
break # Found the target, stop searching
|
605 |
|
606 |
# --- Create the new message list with specific encoding ---
|
607 |
new_messages = []
|
|
|
618 |
|
619 |
if message.role == "user":
|
620 |
encode_this_message = True
|
621 |
+
print(f"DEBUG: Encoding user message (index {i})")
|
622 |
+
elif message.role == "assistant" and i == target_assistant_index:
|
623 |
+
encode_this_message = True
|
624 |
+
print(f"DEBUG: Encoding target assistant message (index {i})")
|
625 |
else:
|
626 |
# Keep system, other assistant, tool messages as is
|
627 |
+
print(f"DEBUG: Passing through message (index {i}, role {message.role}) without encoding")
|
628 |
new_messages.append(message)
|
629 |
continue # Skip encoding logic below for this message
|
630 |
|
|
|
1248 |
"root": "gemini-2.5-flash-preview-04-17",
|
1249 |
"parent": None,
|
1250 |
},
|
1251 |
+
{
|
1252 |
+
"id": "gemini-2.5-flash-preview-04-17-encrypt",
|
1253 |
+
"object": "model",
|
1254 |
+
"created": int(time.time()),
|
1255 |
+
"owned_by": "google",
|
1256 |
+
"permission": [],
|
1257 |
+
"root": "gemini-2.5-flash-preview-04-17",
|
1258 |
+
"parent": None,
|
1259 |
+
},
|
1260 |
+
{
|
1261 |
+
"id": "gemini-2.5-flash-preview-04-17-nothinking",
|
1262 |
+
"object": "model",
|
1263 |
+
"created": int(time.time()),
|
1264 |
+
"owned_by": "google",
|
1265 |
+
"permission": [],
|
1266 |
+
"root": "gemini-2.5-flash-preview-04-17",
|
1267 |
+
"parent": None,
|
1268 |
+
},
|
1269 |
+
{
|
1270 |
+
"id": "gemini-2.5-flash-preview-04-17-max",
|
1271 |
+
"object": "model",
|
1272 |
+
"created": int(time.time()),
|
1273 |
+
"owned_by": "google",
|
1274 |
+
"permission": [],
|
1275 |
+
"root": "gemini-2.5-flash-preview-04-17",
|
1276 |
+
"parent": None,
|
1277 |
+
},
|
1278 |
{
|
1279 |
"id": "gemini-1.5-flash-8b",
|
1280 |
"object": "model",
|
|
|
1353 |
is_grounded_search = request.model.endswith("-search")
|
1354 |
is_encrypted_model = request.model.endswith("-encrypt")
|
1355 |
is_encrypted_full_model = request.model.endswith("-encrypt-full")
|
1356 |
+
is_nothinking_model = request.model.endswith("-nothinking")
|
1357 |
+
is_max_thinking_model = request.model.endswith("-max")
|
1358 |
|
1359 |
if is_auto_model:
|
1360 |
base_model_name = request.model.replace("-auto", "")
|
|
|
1364 |
base_model_name = request.model.replace("-encrypt", "")
|
1365 |
elif is_encrypted_full_model:
|
1366 |
base_model_name = request.model.replace("-encrypt-full", "")
|
1367 |
+
elif is_nothinking_model:
|
1368 |
+
base_model_name = request.model.replace("-nothinking","")
|
1369 |
+
# Specific check for the flash model requiring budget
|
1370 |
+
if base_model_name != "gemini-2.5-flash-preview-04-17":
|
1371 |
+
error_response = create_openai_error_response(
|
1372 |
+
400, f"Model '{request.model}' does not support -nothinking variant", "invalid_request_error"
|
1373 |
+
)
|
1374 |
+
return JSONResponse(status_code=400, content=error_response)
|
1375 |
+
elif is_max_thinking_model:
|
1376 |
+
base_model_name = request.model.replace("-max","")
|
1377 |
+
# Specific check for the flash model requiring budget
|
1378 |
+
if base_model_name != "gemini-2.5-flash-preview-04-17":
|
1379 |
+
error_response = create_openai_error_response(
|
1380 |
+
400, f"Model '{request.model}' does not support -max variant", "invalid_request_error"
|
1381 |
+
)
|
1382 |
+
return JSONResponse(status_code=400, content=error_response)
|
1383 |
else:
|
1384 |
base_model_name = request.model
|
1385 |
|
|
|
1630 |
]
|
1631 |
current_config["system_instruction"] = encryption_instructions
|
1632 |
current_prompt_func = create_encrypted_full_gemini_prompt
|
1633 |
+
elif is_nothinking_model:
|
1634 |
+
print(f"Using no thinking budget for model: {request.model}")
|
1635 |
+
current_config["thinking_config"] = {"thinking_budget": 0}
|
1636 |
+
|
1637 |
+
elif is_max_thinking_model:
|
1638 |
+
print(f"Using max thinking budget for model: {request.model}")
|
1639 |
+
current_config["thinking_config"] = {"thinking_budget": 24576}
|
1640 |
|
1641 |
|
1642 |
try:
|