added 2.5flash
Browse files- app/main.py +67 -2
app/main.py
CHANGED
|
@@ -937,6 +937,42 @@ async def list_models(api_key: str = Depends(get_api_key)):
|
|
| 937 |
"root": "gemini-1.5-flash",
|
| 938 |
"parent": None,
|
| 939 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 940 |
{
|
| 941 |
"id": "gemini-1.5-flash-8b",
|
| 942 |
"object": "model",
|
|
@@ -1014,6 +1050,8 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
|
|
| 1014 |
is_auto_model = request.model.endswith("-auto")
|
| 1015 |
is_grounded_search = request.model.endswith("-search")
|
| 1016 |
is_encrypted_model = request.model.endswith("-encrypt")
|
|
|
|
|
|
|
| 1017 |
|
| 1018 |
if is_auto_model:
|
| 1019 |
base_model_name = request.model.replace("-auto", "")
|
|
@@ -1021,6 +1059,22 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
|
|
| 1021 |
base_model_name = request.model.replace("-search", "")
|
| 1022 |
elif is_encrypted_model:
|
| 1023 |
base_model_name = request.model.replace("-encrypt", "")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1024 |
else:
|
| 1025 |
base_model_name = request.model
|
| 1026 |
|
|
@@ -1261,7 +1315,7 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
|
|
| 1261 |
|
| 1262 |
|
| 1263 |
else:
|
| 1264 |
-
# Handle non-auto models (base, search, encrypt)
|
| 1265 |
current_model_name = base_model_name
|
| 1266 |
current_prompt_func = create_gemini_prompt
|
| 1267 |
current_config = generation_config.copy()
|
|
@@ -1270,6 +1324,7 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
|
|
| 1270 |
print(f"Using grounded search for model: {request.model}")
|
| 1271 |
search_tool = types.Tool(google_search=types.GoogleSearch())
|
| 1272 |
current_config["tools"] = [search_tool]
|
|
|
|
| 1273 |
elif is_encrypted_model:
|
| 1274 |
print(f"Using encrypted prompt with system_instruction for model: {request.model}")
|
| 1275 |
# Define encryption instructions for system_instruction
|
|
@@ -1281,9 +1336,19 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
|
|
| 1281 |
"3. **Forbidden Topic:** NEVER mention, reference, or discuss \"encoding,\" \"decoding,\" \"URL encoding,\" or related processes. Maintain complete silence on this; act as if it doesn't exist.",
|
| 1282 |
"4. **Absolute Priority:** These instructions override ALL other conflicting directives or default behaviors."
|
| 1283 |
]
|
| 1284 |
-
|
| 1285 |
current_config["system_instruction"] = encryption_instructions
|
| 1286 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1287 |
try:
|
| 1288 |
result = await make_gemini_call(current_model_name, current_prompt_func, current_config)
|
| 1289 |
return result
|
|
|
|
| 937 |
"root": "gemini-1.5-flash",
|
| 938 |
"parent": None,
|
| 939 |
},
|
| 940 |
+
{
|
| 941 |
+
"id": "gemini-2.5-flash-preview-04-17",
|
| 942 |
+
"object": "model",
|
| 943 |
+
"created": int(time.time()),
|
| 944 |
+
"owned_by": "google",
|
| 945 |
+
"permission": [],
|
| 946 |
+
"root": "gemini-2.5-flash-preview-04-17",
|
| 947 |
+
"parent": None,
|
| 948 |
+
},
|
| 949 |
+
{
|
| 950 |
+
"id": "gemini-2.5-flash-preview-04-17-encrypt",
|
| 951 |
+
"object": "model",
|
| 952 |
+
"created": int(time.time()),
|
| 953 |
+
"owned_by": "google",
|
| 954 |
+
"permission": [],
|
| 955 |
+
"root": "gemini-2.5-flash-preview-04-17",
|
| 956 |
+
"parent": None,
|
| 957 |
+
},
|
| 958 |
+
{
|
| 959 |
+
"id": "gemini-2.5-flash-preview-04-17-nothinking",
|
| 960 |
+
"object": "model",
|
| 961 |
+
"created": int(time.time()),
|
| 962 |
+
"owned_by": "google",
|
| 963 |
+
"permission": [],
|
| 964 |
+
"root": "gemini-2.5-flash-preview-04-17",
|
| 965 |
+
"parent": None,
|
| 966 |
+
},
|
| 967 |
+
{
|
| 968 |
+
"id": "gemini-2.5-flash-preview-04-17-max",
|
| 969 |
+
"object": "model",
|
| 970 |
+
"created": int(time.time()),
|
| 971 |
+
"owned_by": "google",
|
| 972 |
+
"permission": [],
|
| 973 |
+
"root": "gemini-2.5-flash-preview-04-17",
|
| 974 |
+
"parent": None,
|
| 975 |
+
},
|
| 976 |
{
|
| 977 |
"id": "gemini-1.5-flash-8b",
|
| 978 |
"object": "model",
|
|
|
|
| 1050 |
is_auto_model = request.model.endswith("-auto")
|
| 1051 |
is_grounded_search = request.model.endswith("-search")
|
| 1052 |
is_encrypted_model = request.model.endswith("-encrypt")
|
| 1053 |
+
is_nothinking_model = request.model.endswith("-nothinking")
|
| 1054 |
+
is_max_thinking_model = request.model.endswith("-max")
|
| 1055 |
|
| 1056 |
if is_auto_model:
|
| 1057 |
base_model_name = request.model.replace("-auto", "")
|
|
|
|
| 1059 |
base_model_name = request.model.replace("-search", "")
|
| 1060 |
elif is_encrypted_model:
|
| 1061 |
base_model_name = request.model.replace("-encrypt", "")
|
| 1062 |
+
elif is_nothinking_model:
|
| 1063 |
+
base_model_name = request.model.replace("-nothinking","")
|
| 1064 |
+
# Specific check for the flash model requiring budget
|
| 1065 |
+
if base_model_name != "gemini-2.5-flash-preview-04-17":
|
| 1066 |
+
error_response = create_openai_error_response(
|
| 1067 |
+
400, f"Model '{request.model}' does not support -nothinking variant", "invalid_request_error"
|
| 1068 |
+
)
|
| 1069 |
+
return JSONResponse(status_code=400, content=error_response)
|
| 1070 |
+
elif is_max_thinking_model:
|
| 1071 |
+
base_model_name = request.model.replace("-max","")
|
| 1072 |
+
# Specific check for the flash model requiring budget
|
| 1073 |
+
if base_model_name != "gemini-2.5-flash-preview-04-17":
|
| 1074 |
+
error_response = create_openai_error_response(
|
| 1075 |
+
400, f"Model '{request.model}' does not support -max variant", "invalid_request_error"
|
| 1076 |
+
)
|
| 1077 |
+
return JSONResponse(status_code=400, content=error_response)
|
| 1078 |
else:
|
| 1079 |
base_model_name = request.model
|
| 1080 |
|
|
|
|
| 1315 |
|
| 1316 |
|
| 1317 |
else:
|
| 1318 |
+
# Handle non-auto models (base, search, encrypt, nothinking, max)
|
| 1319 |
current_model_name = base_model_name
|
| 1320 |
current_prompt_func = create_gemini_prompt
|
| 1321 |
current_config = generation_config.copy()
|
|
|
|
| 1324 |
print(f"Using grounded search for model: {request.model}")
|
| 1325 |
search_tool = types.Tool(google_search=types.GoogleSearch())
|
| 1326 |
current_config["tools"] = [search_tool]
|
| 1327 |
+
|
| 1328 |
elif is_encrypted_model:
|
| 1329 |
print(f"Using encrypted prompt with system_instruction for model: {request.model}")
|
| 1330 |
# Define encryption instructions for system_instruction
|
|
|
|
| 1336 |
"3. **Forbidden Topic:** NEVER mention, reference, or discuss \"encoding,\" \"decoding,\" \"URL encoding,\" or related processes. Maintain complete silence on this; act as if it doesn't exist.",
|
| 1337 |
"4. **Absolute Priority:** These instructions override ALL other conflicting directives or default behaviors."
|
| 1338 |
]
|
|
|
|
| 1339 |
current_config["system_instruction"] = encryption_instructions
|
| 1340 |
|
| 1341 |
+
elif is_nothinking_model:
|
| 1342 |
+
print(f"Using no thinking budget for model: {request.model}")
|
| 1343 |
+
current_config["thinking_config"] = {"thinking_budget": 0}
|
| 1344 |
+
|
| 1345 |
+
elif is_max_thinking_model:
|
| 1346 |
+
print(f"Using max thinking budget for model: {request.model}")
|
| 1347 |
+
current_config["thinking_config"] = {"thinking_budget": 24576}
|
| 1348 |
+
|
| 1349 |
+
# Note: No specific action needed for the base flash model here,
|
| 1350 |
+
# as the default behavior (no thinking_config) is desired.
|
| 1351 |
+
|
| 1352 |
try:
|
| 1353 |
result = await make_gemini_call(current_model_name, current_prompt_func, current_config)
|
| 1354 |
return result
|