Update app/main.py
Browse files- app/main.py +39 -30
app/main.py
CHANGED
@@ -1552,39 +1552,48 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
|
|
1552 |
|
1553 |
# --- End of specific OpenAI client model handling ---
|
1554 |
|
1555 |
-
#
|
1556 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1557 |
is_auto_model = True
|
1558 |
-
|
1559 |
-
|
1560 |
-
|
1561 |
-
|
1562 |
-
|
1563 |
-
|
1564 |
-
|
1565 |
-
|
1566 |
-
|
1567 |
-
|
1568 |
-
elif
|
1569 |
-
|
1570 |
-
|
1571 |
-
base_model_name = request.model.replace("-encrypt-full", "")
|
1572 |
-
elif is_nothinking_model:
|
1573 |
-
base_model_name = request.model.replace("-nothinking","")
|
1574 |
# Specific check for the flash model requiring budget
|
1575 |
-
|
1576 |
-
|
1577 |
-
|
1578 |
-
|
1579 |
-
|
1580 |
-
|
1581 |
-
|
|
|
|
|
1582 |
# Specific check for the flash model requiring budget
|
1583 |
-
|
1584 |
-
|
1585 |
-
|
1586 |
-
|
1587 |
-
|
|
|
1588 |
else:
|
1589 |
base_model_name = request.model
|
1590 |
|
|
|
1552 |
|
1553 |
# --- End of specific OpenAI client model handling ---
|
1554 |
|
1555 |
+
# Initialize flags before checking suffixes
|
1556 |
+
is_auto_model = False
|
1557 |
+
is_grounded_search = False
|
1558 |
+
is_encrypted_model = False
|
1559 |
+
is_encrypted_full_model = False
|
1560 |
+
is_nothinking_model = False
|
1561 |
+
is_max_thinking_model = False
|
1562 |
+
base_model_name = request.model # Default to the full name
|
1563 |
+
|
1564 |
+
# Check model type and extract base model name
|
1565 |
+
if request.model.endswith("-auto"):
|
1566 |
is_auto_model = True
|
1567 |
+
base_model_name = request.model.replace("-auto", "")
|
1568 |
+
elif request.model.endswith("-search"):
|
1569 |
+
is_grounded_search = True
|
1570 |
+
base_model_name = request.model.replace("-search", "")
|
1571 |
+
elif request.model.endswith("-encrypt"):
|
1572 |
+
is_encrypted_model = True
|
1573 |
+
base_model_name = request.model.replace("-encrypt", "")
|
1574 |
+
elif request.model.endswith("-encrypt-full"):
|
1575 |
+
is_encrypted_full_model = True
|
1576 |
+
base_model_name = request.model.replace("-encrypt-full", "")
|
1577 |
+
elif request.model.endswith("-nothinking"):
|
1578 |
+
is_nothinking_model = True
|
1579 |
+
base_model_name = request.model.replace("-nothinking","")
|
|
|
|
|
|
|
1580 |
# Specific check for the flash model requiring budget
|
1581 |
+
# Specific check for the flash model requiring budget
|
1582 |
+
if base_model_name != "gemini-2.5-flash-preview-04-17":
|
1583 |
+
error_response = create_openai_error_response(
|
1584 |
+
400, f"Model '{request.model}' does not support -nothinking variant", "invalid_request_error"
|
1585 |
+
)
|
1586 |
+
return JSONResponse(status_code=400, content=error_response)
|
1587 |
+
elif request.model.endswith("-max"):
|
1588 |
+
is_max_thinking_model = True
|
1589 |
+
base_model_name = request.model.replace("-max","")
|
1590 |
# Specific check for the flash model requiring budget
|
1591 |
+
# Specific check for the flash model requiring budget
|
1592 |
+
if base_model_name != "gemini-2.5-flash-preview-04-17":
|
1593 |
+
error_response = create_openai_error_response(
|
1594 |
+
400, f"Model '{request.model}' does not support -max variant", "invalid_request_error"
|
1595 |
+
)
|
1596 |
+
return JSONResponse(status_code=400, content=error_response)
|
1597 |
else:
|
1598 |
base_model_name = request.model
|
1599 |
|