bibibi12345 commited on
Commit
0527a50
·
1 Parent(s): ef7065d

fixed bugs

Browse files
Files changed (2) hide show
  1. app/api_helpers.py +1 -1
  2. app/routes/chat_api.py +11 -19
app/api_helpers.py CHANGED
@@ -254,7 +254,7 @@ def is_gemini_response_valid(response: Any) -> bool:
254
  if hasattr(response, 'text') and isinstance(response.text, str) and response.text.strip():
255
  return True
256
 
257
- # Check for candidates (both SDK and DirectVertexClient responses)
258
  if hasattr(response, 'candidates') and response.candidates:
259
  for candidate in response.candidates:
260
  # Check for direct text on candidate
 
254
  if hasattr(response, 'text') and isinstance(response.text, str) and response.text.strip():
255
  return True
256
 
257
+ # Check for candidates in the response
258
  if hasattr(response, 'candidates') and response.candidates:
259
  for candidate in response.candidates:
260
  # Check for direct text on candidate
app/routes/chat_api.py CHANGED
@@ -24,7 +24,7 @@ from api_helpers import (
24
  execute_gemini_call,
25
  )
26
  from openai_handler import OpenAIDirectHandler
27
- from direct_vertex_client import DirectVertexClient
28
 
29
  router = APIRouter()
30
 
@@ -118,9 +118,15 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
118
  try:
119
  # Check if model contains "gemini-2.5-pro" or "gemini-2.5-flash" for direct URL approach
120
  if "gemini-2.5-pro" in base_model_name or "gemini-2.5-flash" in base_model_name:
121
- client_to_use = DirectVertexClient(api_key=key_val)
122
- await client_to_use.discover_project_id()
123
- print(f"INFO: Attempt {attempt+1}/{total_keys} - Using DirectVertexClient for model {request.model} (base: {base_model_name}) with API key (original index: {original_idx}).")
 
 
 
 
 
 
124
  else:
125
  client_to_use = genai.Client(vertexai=True, api_key=key_val)
126
  print(f"INFO: Attempt {attempt+1}/{total_keys} - Using Vertex Express Mode SDK for model {request.model} (base: {base_model_name}) with API key (original index: {original_idx}).")
@@ -185,9 +191,6 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
185
  try:
186
  # Pass is_auto_attempt=True for auto-mode calls
187
  result = await execute_gemini_call(client_to_use, attempt["model"], attempt["prompt_func"], current_gen_config, request, is_auto_attempt=True)
188
- # Clean up DirectVertexClient session if used
189
- if isinstance(client_to_use, DirectVertexClient):
190
- await client_to_use.close()
191
  return result
192
  except Exception as e_auto:
193
  last_err = e_auto
@@ -196,9 +199,6 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
196
 
197
  print(f"All auto attempts failed. Last error: {last_err}")
198
  err_msg = f"All auto-mode attempts failed for model {request.model}. Last error: {str(last_err)}"
199
- # Clean up DirectVertexClient session if used
200
- if isinstance(client_to_use, DirectVertexClient):
201
- await client_to_use.close()
202
  if not request.stream and last_err:
203
  return JSONResponse(status_code=500, content=create_openai_error_response(500, err_msg, "server_error"))
204
  elif request.stream:
@@ -245,17 +245,9 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
245
  # but the API call might need the full "gemini-1.5-pro-search".
246
  # Let's use `request.model` for the API call here, and `base_model_name` for checks like Express eligibility.
247
  # For non-auto mode, is_auto_attempt defaults to False in execute_gemini_call
248
- try:
249
- return await execute_gemini_call(client_to_use, base_model_name, current_prompt_func, generation_config, request)
250
- finally:
251
- # Clean up DirectVertexClient session if used
252
- if isinstance(client_to_use, DirectVertexClient):
253
- await client_to_use.close()
254
 
255
  except Exception as e:
256
  error_msg = f"Unexpected error in chat_completions endpoint: {str(e)}"
257
  print(error_msg)
258
- # Clean up DirectVertexClient session if it exists
259
- if 'client_to_use' in locals() and isinstance(client_to_use, DirectVertexClient):
260
- await client_to_use.close()
261
  return JSONResponse(status_code=500, content=create_openai_error_response(500, error_msg, "server_error"))
 
24
  execute_gemini_call,
25
  )
26
  from openai_handler import OpenAIDirectHandler
27
+ from project_id_discovery import discover_project_id
28
 
29
  router = APIRouter()
30
 
 
118
  try:
119
  # Check if model contains "gemini-2.5-pro" or "gemini-2.5-flash" for direct URL approach
120
  if "gemini-2.5-pro" in base_model_name or "gemini-2.5-flash" in base_model_name:
121
+ project_id = await discover_project_id(key_val)
122
+ base_url = f"https://aiplatform.googleapis.com/v1/projects/{project_id}/locations/global"
123
+ client_to_use = genai.Client(
124
+ vertexai=True,
125
+ api_key=key_val,
126
+ http_options=types.HttpOptions(base_url=base_url)
127
+ )
128
+ client_to_use._api_client._http_options.api_version = None
129
+ print(f"INFO: Attempt {attempt+1}/{total_keys} - Using Vertex Express Mode with custom base URL for model {request.model} (base: {base_model_name}) with API key (original index: {original_idx}).")
130
  else:
131
  client_to_use = genai.Client(vertexai=True, api_key=key_val)
132
  print(f"INFO: Attempt {attempt+1}/{total_keys} - Using Vertex Express Mode SDK for model {request.model} (base: {base_model_name}) with API key (original index: {original_idx}).")
 
191
  try:
192
  # Pass is_auto_attempt=True for auto-mode calls
193
  result = await execute_gemini_call(client_to_use, attempt["model"], attempt["prompt_func"], current_gen_config, request, is_auto_attempt=True)
 
 
 
194
  return result
195
  except Exception as e_auto:
196
  last_err = e_auto
 
199
 
200
  print(f"All auto attempts failed. Last error: {last_err}")
201
  err_msg = f"All auto-mode attempts failed for model {request.model}. Last error: {str(last_err)}"
 
 
 
202
  if not request.stream and last_err:
203
  return JSONResponse(status_code=500, content=create_openai_error_response(500, err_msg, "server_error"))
204
  elif request.stream:
 
245
  # but the API call might need the full "gemini-1.5-pro-search".
246
  # Let's use `request.model` for the API call here, and `base_model_name` for checks like Express eligibility.
247
  # For non-auto mode, is_auto_attempt defaults to False in execute_gemini_call
248
+ return await execute_gemini_call(client_to_use, base_model_name, current_prompt_func, generation_config, request)
 
 
 
 
 
249
 
250
  except Exception as e:
251
  error_msg = f"Unexpected error in chat_completions endpoint: {str(e)}"
252
  print(error_msg)
 
 
 
253
  return JSONResponse(status_code=500, content=create_openai_error_response(500, error_msg, "server_error"))