vertex

Running

App Files Files Community

bibibi12345 commited on Jun 11

Commit

3a3173c

1 Parent(s): 48c8917

fixed openai mode thinking

Browse files

Files changed (3) hide show

app/api_helpers.py +1 -1
app/openai_handler.py +3 -2
app/routes/chat_api.py +19 -11

app/api_helpers.py CHANGED Viewed

@@ -254,7 +254,7 @@ def is_gemini_response_valid(response: Any) -> bool:
     if hasattr(response, 'text') and isinstance(response.text, str) and response.text.strip():
         return True
-    # Check for candidates in the response
     if hasattr(response, 'candidates') and response.candidates:
         for candidate in response.candidates:
             # Check for direct text on candidate

     if hasattr(response, 'text') and isinstance(response.text, str) and response.text.strip():
         return True
+    # Check for candidates (both SDK and DirectVertexClient responses)
     if hasattr(response, 'candidates') and response.candidates:
         for candidate in response.candidates:
             # Check for direct text on candidate

app/openai_handler.py CHANGED Viewed

@@ -71,8 +71,9 @@ class OpenAIDirectHandler:
                 'google': {
                     'safety_settings': self.safety_settings,
                     'thought_tag_marker': VERTEX_REASONING_TAG,
-                    'thinking_config ': {"include_thoughts": True}
                 }
             }
         }

                 'google': {
                     'safety_settings': self.safety_settings,
                     'thought_tag_marker': VERTEX_REASONING_TAG,
+                    "thinking_config": {
+                        "include_thoughts": True
+                    }
                 }
             }
         }

app/routes/chat_api.py CHANGED Viewed

@@ -24,7 +24,7 @@ from api_helpers import (
     execute_gemini_call,
 )
 from openai_handler import OpenAIDirectHandler
-from project_id_discovery import discover_project_id
 router = APIRouter()
@@ -118,15 +118,9 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
                     try:
                         # Check if model contains "gemini-2.5-pro" or "gemini-2.5-flash" for direct URL approach
                         if "gemini-2.5-pro" in base_model_name or "gemini-2.5-flash" in base_model_name:
-                            project_id = await discover_project_id(key_val)
-                            base_url = f"https://aiplatform.googleapis.com/v1/projects/{project_id}/locations/global"
-                            client_to_use = genai.Client(
-                                vertexai=True,
-                                api_key=key_val,
-                                http_options=types.HttpOptions(base_url=base_url)
-                            )
-                            client_to_use._api_client._http_options.api_version = None
-                            print(f"INFO: Attempt {attempt+1}/{total_keys} - Using Vertex Express Mode with custom base URL for model {request.model} (base: {base_model_name}) with API key (original index: {original_idx}).")
                         else:
                             client_to_use = genai.Client(vertexai=True, api_key=key_val)
                             print(f"INFO: Attempt {attempt+1}/{total_keys} - Using Vertex Express Mode SDK for model {request.model} (base: {base_model_name}) with API key (original index: {original_idx}).")
@@ -191,6 +185,9 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
                 try:
                     # Pass is_auto_attempt=True for auto-mode calls
                     result = await execute_gemini_call(client_to_use, attempt["model"], attempt["prompt_func"], current_gen_config, request, is_auto_attempt=True)
                     return result
                 except Exception as e_auto:
                     last_err = e_auto
@@ -199,6 +196,9 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
             print(f"All auto attempts failed. Last error: {last_err}")
             err_msg = f"All auto-mode attempts failed for model {request.model}. Last error: {str(last_err)}"
             if not request.stream and last_err:
                  return JSONResponse(status_code=500, content=create_openai_error_response(500, err_msg, "server_error"))
             elif request.stream:
@@ -245,9 +245,17 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
             # but the API call might need the full "gemini-1.5-pro-search".
             # Let's use `request.model` for the API call here, and `base_model_name` for checks like Express eligibility.
             # For non-auto mode, is_auto_attempt defaults to False in execute_gemini_call
-            return await execute_gemini_call(client_to_use, base_model_name, current_prompt_func, generation_config, request)
     except Exception as e:
         error_msg = f"Unexpected error in chat_completions endpoint: {str(e)}"
         print(error_msg)
         return JSONResponse(status_code=500, content=create_openai_error_response(500, error_msg, "server_error"))

     execute_gemini_call,
 )
 from openai_handler import OpenAIDirectHandler
+from direct_vertex_client import DirectVertexClient
 router = APIRouter()
                     try:
                         # Check if model contains "gemini-2.5-pro" or "gemini-2.5-flash" for direct URL approach
                         if "gemini-2.5-pro" in base_model_name or "gemini-2.5-flash" in base_model_name:
+                            client_to_use = DirectVertexClient(api_key=key_val)
+                            await client_to_use.discover_project_id()
+                            print(f"INFO: Attempt {attempt+1}/{total_keys} - Using DirectVertexClient for model {request.model} (base: {base_model_name}) with API key (original index: {original_idx}).")
                         else:
                             client_to_use = genai.Client(vertexai=True, api_key=key_val)
                             print(f"INFO: Attempt {attempt+1}/{total_keys} - Using Vertex Express Mode SDK for model {request.model} (base: {base_model_name}) with API key (original index: {original_idx}).")
                 try:
                     # Pass is_auto_attempt=True for auto-mode calls
                     result = await execute_gemini_call(client_to_use, attempt["model"], attempt["prompt_func"], current_gen_config, request, is_auto_attempt=True)
+                    # Clean up DirectVertexClient session if used
+                    if isinstance(client_to_use, DirectVertexClient):
+                        await client_to_use.close()
                     return result
                 except Exception as e_auto:
                     last_err = e_auto
             print(f"All auto attempts failed. Last error: {last_err}")
             err_msg = f"All auto-mode attempts failed for model {request.model}. Last error: {str(last_err)}"
+            # Clean up DirectVertexClient session if used
+            if isinstance(client_to_use, DirectVertexClient):
+                await client_to_use.close()
             if not request.stream and last_err:
                  return JSONResponse(status_code=500, content=create_openai_error_response(500, err_msg, "server_error"))
             elif request.stream:
             # but the API call might need the full "gemini-1.5-pro-search".
             # Let's use `request.model` for the API call here, and `base_model_name` for checks like Express eligibility.
             # For non-auto mode, is_auto_attempt defaults to False in execute_gemini_call
+            try:
+                return await execute_gemini_call(client_to_use, base_model_name, current_prompt_func, generation_config, request)
+            finally:
+                # Clean up DirectVertexClient session if used
+                if isinstance(client_to_use, DirectVertexClient):
+                    await client_to_use.close()
     except Exception as e:
         error_msg = f"Unexpected error in chat_completions endpoint: {str(e)}"
         print(error_msg)
+        # Clean up DirectVertexClient session if it exists
+        if 'client_to_use' in locals() and isinstance(client_to_use, DirectVertexClient):
+            await client_to_use.close()
         return JSONResponse(status_code=500, content=create_openai_error_response(500, error_msg, "server_error"))