bibibi12345 commited on
Commit
61a24c9
·
1 Parent(s): df1784a

added 0605 thinking config support

Browse files
app/routes/chat_api.py CHANGED
@@ -87,10 +87,10 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
87
  elif is_max_thinking_model: base_model_name = base_model_name[:-len("-max")]
88
 
89
  # Specific model variant checks (if any remain exclusive and not covered dynamically)
90
- if is_nothinking_model and not base_model_name.startswith("gemini-2.5-flash"):
91
- return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' (-nothinking) is only supported for models starting with 'gemini-2.5-flash'.", "invalid_request_error"))
92
- if is_max_thinking_model and not base_model_name.startswith("gemini-2.5-flash"):
93
- return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' (-max) is only supported for models starting with 'gemini-2.5-flash'.", "invalid_request_error"))
94
 
95
  generation_config = create_generation_config(request)
96
 
@@ -213,9 +213,15 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
213
  generation_config["system_instruction"] = ENCRYPTION_INSTRUCTIONS
214
  current_prompt_func = create_encrypted_full_gemini_prompt
215
  elif is_nothinking_model:
216
- generation_config["thinking_config"] = {"thinking_budget": 0}
 
 
 
217
  elif is_max_thinking_model:
218
- generation_config["thinking_config"] = {"thinking_budget": 24576}
 
 
 
219
 
220
  # For non-auto models, the 'base_model_name' might have suffix stripped.
221
  # We should use the original 'request.model' for API call if it's a suffixed one,
 
87
  elif is_max_thinking_model: base_model_name = base_model_name[:-len("-max")]
88
 
89
  # Specific model variant checks (if any remain exclusive and not covered dynamically)
90
+ if is_nothinking_model and not (base_model_name.startswith("gemini-2.5-flash") or base_model_name == "gemini-2.5-pro-preview-06-05"):
91
+ return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' (-nothinking) is only supported for models starting with 'gemini-2.5-flash' or 'gemini-2.5-pro-preview-06-05'.", "invalid_request_error"))
92
+ if is_max_thinking_model and not (base_model_name.startswith("gemini-2.5-flash") or base_model_name == "gemini-2.5-pro-preview-06-05"):
93
+ return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' (-max) is only supported for models starting with 'gemini-2.5-flash' or 'gemini-2.5-pro-preview-06-05'.", "invalid_request_error"))
94
 
95
  generation_config = create_generation_config(request)
96
 
 
213
  generation_config["system_instruction"] = ENCRYPTION_INSTRUCTIONS
214
  current_prompt_func = create_encrypted_full_gemini_prompt
215
  elif is_nothinking_model:
216
+ if base_model_name == "gemini-2.5-pro-preview-06-05":
217
+ generation_config["thinking_config"] = {"thinking_budget": 128}
218
+ else:
219
+ generation_config["thinking_config"] = {"thinking_budget": 0}
220
  elif is_max_thinking_model:
221
+ if base_model_name == "gemini-2.5-pro-preview-06-05":
222
+ generation_config["thinking_config"] = {"thinking_budget": 32768}
223
+ else:
224
+ generation_config["thinking_config"] = {"thinking_budget": 24576}
225
 
226
  # For non-auto models, the 'base_model_name' might have suffix stripped.
227
  # We should use the original 'request.model' for API call if it's a suffixed one,
app/routes/models_api.py CHANGED
@@ -90,10 +90,10 @@ async def list_models(fastapi_request: Request, api_key: str = Depends(get_api_k
90
  "permission": [], "root": original_model_id, "parent": None
91
  })
92
 
93
- # Apply special suffixes for models starting with "gemini-2.5-flash"
94
- if "gemini-2.5-flash" in original_model_id: # Suffix rules based on original_model_id
95
- special_flash_suffixes = ["-nothinking", "-max"]
96
- for special_suffix in special_flash_suffixes:
97
  suffixed_model_part = f"{original_model_id}{special_suffix}"
98
  final_special_suffixed_display_id = f"{current_display_prefix}{suffixed_model_part}"
99
 
 
90
  "permission": [], "root": original_model_id, "parent": None
91
  })
92
 
93
+ # Apply special suffixes for models starting with "gemini-2.5-flash" or specifically "gemini-2.5-pro-preview-06-05"
94
+ if "gemini-2.5-flash" in original_model_id or original_model_id == "gemini-2.5-pro-preview-06-05": # Suffix rules based on original_model_id
95
+ special_thinking_suffixes = ["-nothinking", "-max"]
96
+ for special_suffix in special_thinking_suffixes:
97
  suffixed_model_part = f"{original_model_id}{special_suffix}"
98
  final_special_suffixed_display_id = f"{current_display_prefix}{suffixed_model_part}"
99
 
vertexModels.json CHANGED
@@ -3,6 +3,7 @@
3
  "gemini-2.5-pro-exp-03-25",
4
  "gemini-2.5-pro-preview-03-25",
5
  "gemini-2.5-pro-preview-05-06",
 
6
  "gemini-2.5-flash-preview-05-20",
7
  "gemini-2.5-flash-preview-04-17",
8
  "gemini-2.0-flash-001",
@@ -14,6 +15,7 @@
14
  "gemini-2.5-pro-preview-03-25",
15
  "gemini-2.5-flash-preview-04-17",
16
  "gemini-2.5-flash-preview-05-20",
17
- "gemini-2.5-pro-preview-05-06"
 
18
  ]
19
  }
 
3
  "gemini-2.5-pro-exp-03-25",
4
  "gemini-2.5-pro-preview-03-25",
5
  "gemini-2.5-pro-preview-05-06",
6
+ "gemini-2.5-pro-preview-06-05",
7
  "gemini-2.5-flash-preview-05-20",
8
  "gemini-2.5-flash-preview-04-17",
9
  "gemini-2.0-flash-001",
 
15
  "gemini-2.5-pro-preview-03-25",
16
  "gemini-2.5-flash-preview-04-17",
17
  "gemini-2.5-flash-preview-05-20",
18
+ "gemini-2.5-pro-preview-05-06",
19
+ "gemini-2.5-pro-preview-06-05"
20
  ]
21
  }