vertex

Running

App Files Files Community

bibibi12345 commited on 23 days ago

Commit

0527a50

1 Parent(s): ef7065d

fixed bugs

Browse files

Files changed (2) hide show

app/api_helpers.py +1 -1
app/routes/chat_api.py +11 -19

app/api_helpers.py CHANGED Viewed

@@ -254,7 +254,7 @@ def is_gemini_response_valid(response: Any) -> bool:
     if hasattr(response, 'text') and isinstance(response.text, str) and response.text.strip():
         return True
-    # Check for candidates (both SDK and DirectVertexClient responses)
     if hasattr(response, 'candidates') and response.candidates:
         for candidate in response.candidates:
             # Check for direct text on candidate

     if hasattr(response, 'text') and isinstance(response.text, str) and response.text.strip():
         return True
+    # Check for candidates in the response
     if hasattr(response, 'candidates') and response.candidates:
         for candidate in response.candidates:
             # Check for direct text on candidate

app/routes/chat_api.py CHANGED Viewed

@@ -24,7 +24,7 @@ from api_helpers import (
     execute_gemini_call,
 )
 from openai_handler import OpenAIDirectHandler
-from direct_vertex_client import DirectVertexClient
 router = APIRouter()
@@ -118,9 +118,15 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
                     try:
                         # Check if model contains "gemini-2.5-pro" or "gemini-2.5-flash" for direct URL approach
                         if "gemini-2.5-pro" in base_model_name or "gemini-2.5-flash" in base_model_name:
-                            client_to_use = DirectVertexClient(api_key=key_val)
-                            await client_to_use.discover_project_id()
-                            print(f"INFO: Attempt {attempt+1}/{total_keys} - Using DirectVertexClient for model {request.model} (base: {base_model_name}) with API key (original index: {original_idx}).")
                         else:
                             client_to_use = genai.Client(vertexai=True, api_key=key_val)
                             print(f"INFO: Attempt {attempt+1}/{total_keys} - Using Vertex Express Mode SDK for model {request.model} (base: {base_model_name}) with API key (original index: {original_idx}).")
@@ -185,9 +191,6 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
                 try:
                     # Pass is_auto_attempt=True for auto-mode calls
                     result = await execute_gemini_call(client_to_use, attempt["model"], attempt["prompt_func"], current_gen_config, request, is_auto_attempt=True)
-                    # Clean up DirectVertexClient session if used
-                    if isinstance(client_to_use, DirectVertexClient):
-                        await client_to_use.close()
                     return result
                 except Exception as e_auto:
                     last_err = e_auto
@@ -196,9 +199,6 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
             print(f"All auto attempts failed. Last error: {last_err}")
             err_msg = f"All auto-mode attempts failed for model {request.model}. Last error: {str(last_err)}"
-            # Clean up DirectVertexClient session if used
-            if isinstance(client_to_use, DirectVertexClient):
-                await client_to_use.close()
             if not request.stream and last_err:
                  return JSONResponse(status_code=500, content=create_openai_error_response(500, err_msg, "server_error"))
             elif request.stream:
@@ -245,17 +245,9 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
             # but the API call might need the full "gemini-1.5-pro-search".
             # Let's use `request.model` for the API call here, and `base_model_name` for checks like Express eligibility.
             # For non-auto mode, is_auto_attempt defaults to False in execute_gemini_call
-            try:
-                return await execute_gemini_call(client_to_use, base_model_name, current_prompt_func, generation_config, request)
-            finally:
-                # Clean up DirectVertexClient session if used
-                if isinstance(client_to_use, DirectVertexClient):
-                    await client_to_use.close()
     except Exception as e:
         error_msg = f"Unexpected error in chat_completions endpoint: {str(e)}"
         print(error_msg)
-        # Clean up DirectVertexClient session if it exists
-        if 'client_to_use' in locals() and isinstance(client_to_use, DirectVertexClient):
-            await client_to_use.close()
         return JSONResponse(status_code=500, content=create_openai_error_response(500, error_msg, "server_error"))

     execute_gemini_call,
 )
 from openai_handler import OpenAIDirectHandler
+from project_id_discovery import discover_project_id
 router = APIRouter()
                     try:
                         # Check if model contains "gemini-2.5-pro" or "gemini-2.5-flash" for direct URL approach
                         if "gemini-2.5-pro" in base_model_name or "gemini-2.5-flash" in base_model_name:
+                            project_id = await discover_project_id(key_val)
+                            base_url = f"https://aiplatform.googleapis.com/v1/projects/{project_id}/locations/global"
+                            client_to_use = genai.Client(
+                                vertexai=True,
+                                api_key=key_val,
+                                http_options=types.HttpOptions(base_url=base_url)
+                            )
+                            client_to_use._api_client._http_options.api_version = None
+                            print(f"INFO: Attempt {attempt+1}/{total_keys} - Using Vertex Express Mode with custom base URL for model {request.model} (base: {base_model_name}) with API key (original index: {original_idx}).")
                         else:
                             client_to_use = genai.Client(vertexai=True, api_key=key_val)
                             print(f"INFO: Attempt {attempt+1}/{total_keys} - Using Vertex Express Mode SDK for model {request.model} (base: {base_model_name}) with API key (original index: {original_idx}).")
                 try:
                     # Pass is_auto_attempt=True for auto-mode calls
                     result = await execute_gemini_call(client_to_use, attempt["model"], attempt["prompt_func"], current_gen_config, request, is_auto_attempt=True)
                     return result
                 except Exception as e_auto:
                     last_err = e_auto
             print(f"All auto attempts failed. Last error: {last_err}")
             err_msg = f"All auto-mode attempts failed for model {request.model}. Last error: {str(last_err)}"
             if not request.stream and last_err:
                  return JSONResponse(status_code=500, content=create_openai_error_response(500, err_msg, "server_error"))
             elif request.stream:
             # but the API call might need the full "gemini-1.5-pro-search".
             # Let's use `request.model` for the API call here, and `base_model_name` for checks like Express eligibility.
             # For non-auto mode, is_auto_attempt defaults to False in execute_gemini_call
+            return await execute_gemini_call(client_to_use, base_model_name, current_prompt_func, generation_config, request)
     except Exception as e:
         error_msg = f"Unexpected error in chat_completions endpoint: {str(e)}"
         print(error_msg)
         return JSONResponse(status_code=500, content=create_openai_error_response(500, error_msg, "server_error"))