Commit
·
61a24c9
1
Parent(s):
df1784a
added 0605 thinking config support
Browse files- app/routes/chat_api.py +12 -6
- app/routes/models_api.py +4 -4
- vertexModels.json +3 -1
app/routes/chat_api.py
CHANGED
@@ -87,10 +87,10 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
|
|
87 |
elif is_max_thinking_model: base_model_name = base_model_name[:-len("-max")]
|
88 |
|
89 |
# Specific model variant checks (if any remain exclusive and not covered dynamically)
|
90 |
-
if is_nothinking_model and not base_model_name.startswith("gemini-2.5-flash"):
|
91 |
-
return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' (-nothinking) is only supported for models starting with 'gemini-2.5-flash'.", "invalid_request_error"))
|
92 |
-
if is_max_thinking_model and not base_model_name.startswith("gemini-2.5-flash"):
|
93 |
-
return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' (-max) is only supported for models starting with 'gemini-2.5-flash'.", "invalid_request_error"))
|
94 |
|
95 |
generation_config = create_generation_config(request)
|
96 |
|
@@ -213,9 +213,15 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
|
|
213 |
generation_config["system_instruction"] = ENCRYPTION_INSTRUCTIONS
|
214 |
current_prompt_func = create_encrypted_full_gemini_prompt
|
215 |
elif is_nothinking_model:
|
216 |
-
|
|
|
|
|
|
|
217 |
elif is_max_thinking_model:
|
218 |
-
|
|
|
|
|
|
|
219 |
|
220 |
# For non-auto models, the 'base_model_name' might have suffix stripped.
|
221 |
# We should use the original 'request.model' for API call if it's a suffixed one,
|
|
|
87 |
elif is_max_thinking_model: base_model_name = base_model_name[:-len("-max")]
|
88 |
|
89 |
# Specific model variant checks (if any remain exclusive and not covered dynamically)
|
90 |
+
if is_nothinking_model and not (base_model_name.startswith("gemini-2.5-flash") or base_model_name == "gemini-2.5-pro-preview-06-05"):
|
91 |
+
return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' (-nothinking) is only supported for models starting with 'gemini-2.5-flash' or 'gemini-2.5-pro-preview-06-05'.", "invalid_request_error"))
|
92 |
+
if is_max_thinking_model and not (base_model_name.startswith("gemini-2.5-flash") or base_model_name == "gemini-2.5-pro-preview-06-05"):
|
93 |
+
return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' (-max) is only supported for models starting with 'gemini-2.5-flash' or 'gemini-2.5-pro-preview-06-05'.", "invalid_request_error"))
|
94 |
|
95 |
generation_config = create_generation_config(request)
|
96 |
|
|
|
213 |
generation_config["system_instruction"] = ENCRYPTION_INSTRUCTIONS
|
214 |
current_prompt_func = create_encrypted_full_gemini_prompt
|
215 |
elif is_nothinking_model:
|
216 |
+
if base_model_name == "gemini-2.5-pro-preview-06-05":
|
217 |
+
generation_config["thinking_config"] = {"thinking_budget": 128}
|
218 |
+
else:
|
219 |
+
generation_config["thinking_config"] = {"thinking_budget": 0}
|
220 |
elif is_max_thinking_model:
|
221 |
+
if base_model_name == "gemini-2.5-pro-preview-06-05":
|
222 |
+
generation_config["thinking_config"] = {"thinking_budget": 32768}
|
223 |
+
else:
|
224 |
+
generation_config["thinking_config"] = {"thinking_budget": 24576}
|
225 |
|
226 |
# For non-auto models, the 'base_model_name' might have suffix stripped.
|
227 |
# We should use the original 'request.model' for API call if it's a suffixed one,
|
app/routes/models_api.py
CHANGED
@@ -90,10 +90,10 @@ async def list_models(fastapi_request: Request, api_key: str = Depends(get_api_k
|
|
90 |
"permission": [], "root": original_model_id, "parent": None
|
91 |
})
|
92 |
|
93 |
-
# Apply special suffixes for models starting with "gemini-2.5-flash"
|
94 |
-
if "gemini-2.5-flash" in original_model_id: # Suffix rules based on original_model_id
|
95 |
-
|
96 |
-
for special_suffix in
|
97 |
suffixed_model_part = f"{original_model_id}{special_suffix}"
|
98 |
final_special_suffixed_display_id = f"{current_display_prefix}{suffixed_model_part}"
|
99 |
|
|
|
90 |
"permission": [], "root": original_model_id, "parent": None
|
91 |
})
|
92 |
|
93 |
+
# Apply special suffixes for models starting with "gemini-2.5-flash" or specifically "gemini-2.5-pro-preview-06-05"
|
94 |
+
if "gemini-2.5-flash" in original_model_id or original_model_id == "gemini-2.5-pro-preview-06-05": # Suffix rules based on original_model_id
|
95 |
+
special_thinking_suffixes = ["-nothinking", "-max"]
|
96 |
+
for special_suffix in special_thinking_suffixes:
|
97 |
suffixed_model_part = f"{original_model_id}{special_suffix}"
|
98 |
final_special_suffixed_display_id = f"{current_display_prefix}{suffixed_model_part}"
|
99 |
|
vertexModels.json
CHANGED
@@ -3,6 +3,7 @@
|
|
3 |
"gemini-2.5-pro-exp-03-25",
|
4 |
"gemini-2.5-pro-preview-03-25",
|
5 |
"gemini-2.5-pro-preview-05-06",
|
|
|
6 |
"gemini-2.5-flash-preview-05-20",
|
7 |
"gemini-2.5-flash-preview-04-17",
|
8 |
"gemini-2.0-flash-001",
|
@@ -14,6 +15,7 @@
|
|
14 |
"gemini-2.5-pro-preview-03-25",
|
15 |
"gemini-2.5-flash-preview-04-17",
|
16 |
"gemini-2.5-flash-preview-05-20",
|
17 |
-
"gemini-2.5-pro-preview-05-06"
|
|
|
18 |
]
|
19 |
}
|
|
|
3 |
"gemini-2.5-pro-exp-03-25",
|
4 |
"gemini-2.5-pro-preview-03-25",
|
5 |
"gemini-2.5-pro-preview-05-06",
|
6 |
+
"gemini-2.5-pro-preview-06-05",
|
7 |
"gemini-2.5-flash-preview-05-20",
|
8 |
"gemini-2.5-flash-preview-04-17",
|
9 |
"gemini-2.0-flash-001",
|
|
|
15 |
"gemini-2.5-pro-preview-03-25",
|
16 |
"gemini-2.5-flash-preview-04-17",
|
17 |
"gemini-2.5-flash-preview-05-20",
|
18 |
+
"gemini-2.5-pro-preview-05-06",
|
19 |
+
"gemini-2.5-pro-preview-06-05"
|
20 |
]
|
21 |
}
|