vertex

Running

App Files Files Community

bibibi12345 commited on Apr 27

Commit

3f1c280

verified ·

1 Parent(s): 422a61b

Update app/main.py

Browse files

Files changed (1) hide show

app/main.py +79 -27

app/main.py CHANGED Viewed

@@ -579,29 +579,29 @@ Ready for your request."""
         )
     ]
-    # # --- Find the index of the single assistant message to encrypt ---
-    # target_assistant_index = -1
-    # num_messages = len(messages)
-    # for i in range(num_messages - 1, -1, -1): # Iterate backwards
-    #     if messages[i].role == 'assistant':
-    #         # Condition 1: Is assistant message - met.
-    #         # Condition 2: Not the last message overall?
-    #         is_last_overall = (i == num_messages - 1)
-    #         if is_last_overall:
-    #             continue # Cannot be the target if it's the last message
-    #         # Condition 3: Has a user/system message after it?
-    #         has_user_system_after = False
-    #         for k in range(i + 1, num_messages):
-    #             if messages[k].role in ['user', 'system']:
-    #                 has_user_system_after = True
-    #                 break
-    #         if has_user_system_after:
-    #             # This is the last assistant message meeting all criteria
-    #             target_assistant_index = i
-    #             print(f"DEBUG: Identified target assistant message for encoding at index {target_assistant_index}")
-    #             break # Found the target, stop searching
     # --- Create the new message list with specific encoding ---
     new_messages = []
@@ -618,13 +618,13 @@ Ready for your request."""
         if message.role == "user":
             encode_this_message = True
-            # print(f"DEBUG: Encoding user message (index {i})")
-        # elif message.role == "assistant" and i == target_assistant_index:
-        #     encode_this_message = True
-        #     print(f"DEBUG: Encoding target assistant message (index {i})")
         else:
             # Keep system, other assistant, tool messages as is
-            # print(f"DEBUG: Passing through message (index {i}, role {message.role}) without encoding")
             new_messages.append(message)
             continue # Skip encoding logic below for this message
@@ -1248,6 +1248,33 @@ async def list_models(api_key: str = Depends(get_api_key)):
             "root": "gemini-2.5-flash-preview-04-17",
             "parent": None,
         },
         {
             "id": "gemini-1.5-flash-8b",
             "object": "model",
@@ -1326,6 +1353,8 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
         is_grounded_search = request.model.endswith("-search")
         is_encrypted_model = request.model.endswith("-encrypt")
         is_encrypted_full_model = request.model.endswith("-encrypt-full")
         if is_auto_model:
             base_model_name = request.model.replace("-auto", "")
@@ -1335,6 +1364,22 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
             base_model_name = request.model.replace("-encrypt", "")
         elif is_encrypted_full_model:
             base_model_name = request.model.replace("-encrypt-full", "")
         else:
             base_model_name = request.model
@@ -1585,6 +1630,13 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
                 ]
                 current_config["system_instruction"] = encryption_instructions
                 current_prompt_func = create_encrypted_full_gemini_prompt
             try:

         )
     ]
+    # --- Find the index of the single assistant message to encrypt ---
+    target_assistant_index = -1
+    num_messages = len(messages)
+    for i in range(num_messages - 1, -1, -1): # Iterate backwards
+        if messages[i].role == 'assistant':
+            # Condition 1: Is assistant message - met.
+            # Condition 2: Not the last message overall?
+            is_last_overall = (i == num_messages - 1)
+            if is_last_overall:
+                continue # Cannot be the target if it's the last message
+            # Condition 3: Has a user/system message after it?
+            has_user_system_after = False
+            for k in range(i + 1, num_messages):
+                if messages[k].role in ['user', 'system']:
+                    has_user_system_after = True
+                    break
+            if has_user_system_after:
+                # This is the last assistant message meeting all criteria
+                target_assistant_index = i
+                print(f"DEBUG: Identified target assistant message for encoding at index {target_assistant_index}")
+                break # Found the target, stop searching
     # --- Create the new message list with specific encoding ---
     new_messages = []
         if message.role == "user":
             encode_this_message = True
+            print(f"DEBUG: Encoding user message (index {i})")
+        elif message.role == "assistant" and i == target_assistant_index:
+            encode_this_message = True
+            print(f"DEBUG: Encoding target assistant message (index {i})")
         else:
             # Keep system, other assistant, tool messages as is
+            print(f"DEBUG: Passing through message (index {i}, role {message.role}) without encoding")
             new_messages.append(message)
             continue # Skip encoding logic below for this message
             "root": "gemini-2.5-flash-preview-04-17",
             "parent": None,
         },
+        {
+            "id": "gemini-2.5-flash-preview-04-17-encrypt",
+            "object": "model",
+            "created": int(time.time()),
+            "owned_by": "google",
+            "permission": [],
+            "root": "gemini-2.5-flash-preview-04-17",
+            "parent": None,
+        },
+        {
+            "id": "gemini-2.5-flash-preview-04-17-nothinking",
+            "object": "model",
+            "created": int(time.time()),
+            "owned_by": "google",
+            "permission": [],
+            "root": "gemini-2.5-flash-preview-04-17",
+            "parent": None,
+        },
+        {
+            "id": "gemini-2.5-flash-preview-04-17-max",
+            "object": "model",
+            "created": int(time.time()),
+            "owned_by": "google",
+            "permission": [],
+            "root": "gemini-2.5-flash-preview-04-17",
+            "parent": None,
+        },
         {
             "id": "gemini-1.5-flash-8b",
             "object": "model",
         is_grounded_search = request.model.endswith("-search")
         is_encrypted_model = request.model.endswith("-encrypt")
         is_encrypted_full_model = request.model.endswith("-encrypt-full")
+        is_nothinking_model = request.model.endswith("-nothinking")
+        is_max_thinking_model = request.model.endswith("-max")
         if is_auto_model:
             base_model_name = request.model.replace("-auto", "")
             base_model_name = request.model.replace("-encrypt", "")
         elif is_encrypted_full_model:
             base_model_name = request.model.replace("-encrypt-full", "")
+        elif is_nothinking_model:
+            base_model_name = request.model.replace("-nothinking","")
+            # Specific check for the flash model requiring budget
+            if base_model_name != "gemini-2.5-flash-preview-04-17":
+                error_response = create_openai_error_response(
+                    400, f"Model '{request.model}' does not support -nothinking variant", "invalid_request_error"
+                )
+                return JSONResponse(status_code=400, content=error_response)
+        elif is_max_thinking_model:
+            base_model_name = request.model.replace("-max","")
+            # Specific check for the flash model requiring budget
+            if base_model_name != "gemini-2.5-flash-preview-04-17":
+                error_response = create_openai_error_response(
+                    400, f"Model '{request.model}' does not support -max variant", "invalid_request_error"
+                )
+                return JSONResponse(status_code=400, content=error_response)
         else:
             base_model_name = request.model
                 ]
                 current_config["system_instruction"] = encryption_instructions
                 current_prompt_func = create_encrypted_full_gemini_prompt
+            elif is_nothinking_model:
+                print(f"Using no thinking budget for model: {request.model}")
+                current_config["thinking_config"] = {"thinking_budget": 0}
+            elif is_max_thinking_model:
+                print(f"Using max thinking budget for model: {request.model}")
+                current_config["thinking_config"] = {"thinking_budget": 24576}
             try: