bibibi12345 commited on
Commit
5a10139
·
verified ·
1 Parent(s): 13b9958

Update app/main.py

Browse files
Files changed (1) hide show
  1. app/main.py +31 -15
app/main.py CHANGED
@@ -22,6 +22,13 @@ from google.genai import types
22
 
23
  from google import genai
24
  import math
 
 
 
 
 
 
 
25
 
26
  client = None
27
 
@@ -1824,7 +1831,6 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
1824
  is_nothinking_model = True
1825
  base_model_name = request.model.replace("-nothinking","")
1826
  # Specific check for the flash model requiring budget
1827
- # Specific check for the flash model requiring budget
1828
  if base_model_name != "gemini-2.5-flash-preview-04-17":
1829
  error_response = create_openai_error_response(
1830
  400, f"Model '{request.model}' does not support -nothinking variant", "invalid_request_error"
@@ -1834,41 +1840,51 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
1834
  is_max_thinking_model = True
1835
  base_model_name = request.model.replace("-max","")
1836
  # Specific check for the flash model requiring budget
1837
- # Specific check for the flash model requiring budget
1838
  if base_model_name != "gemini-2.5-flash-preview-04-17":
1839
  error_response = create_openai_error_response(
1840
  400, f"Model '{request.model}' does not support -max variant", "invalid_request_error"
1841
  )
1842
  return JSONResponse(status_code=400, content=error_response)
1843
  else:
1844
- base_model_name = request.model
1845
 
1846
  # Create generation config
1847
  generation_config = create_generation_config(request)
1848
 
1849
- # --- Determine which client to use (Rotation or Fallback) ---
1850
  client_to_use = None
1851
- rotated_credentials, rotated_project_id = credential_manager.get_next_credentials()
1852
 
1853
- if rotated_credentials and rotated_project_id:
 
1854
  try:
1855
- # Create a request-specific client using the rotated credentials
1856
- client_to_use = genai.Client(vertexai=True, credentials=rotated_credentials, project=rotated_project_id, location="us-central1")
1857
- print(f"INFO: Using rotated credential for project: {rotated_project_id} (Index: {credential_manager.current_index -1 if credential_manager.current_index > 0 else len(credential_manager.credentials_files) - 1})") # Log which credential was used
1858
  except Exception as e:
1859
- print(f"ERROR: Failed to create client from rotated credential: {e}. Will attempt fallback.")
1860
- client_to_use = None # Ensure it's None if creation failed
1861
 
1862
- # If rotation failed or wasn't possible, try the fallback client
 
 
 
 
 
 
 
 
 
 
 
1863
  if client_to_use is None:
1864
  global client # Access the fallback client initialized at startup
1865
  if client is not None:
1866
  client_to_use = client
1867
  print("INFO: Using fallback Vertex AI client.")
1868
  else:
1869
- # Critical error: No rotated client AND no fallback client
1870
  error_response = create_openai_error_response(
1871
- 500, "Vertex AI client not available (Rotation failed and no fallback)", "server_error"
1872
  )
1873
  return JSONResponse(status_code=500, content=error_response)
1874
  # --- Client determined ---
@@ -2356,4 +2372,4 @@ def health_check(api_key: str = Depends(get_api_key)):
2356
  }
2357
  }
2358
 
2359
- # Removed /debug/credentials endpoint
 
22
 
23
  from google import genai
24
  import math
25
+ VERTEX_EXPRESS_API_KEY_ENV_VAR = "VERTEX_EXPRESS_API_KEY"
26
+ VERTEX_EXPRESS_MODELS = [
27
+ "gemini-2.0-flash-001",
28
+ "gemini-2.0-flash-lite-001",
29
+ "gemini-2.5-pro-preview-03-25",
30
+ "gemini-2.5-flash-preview-04-17",
31
+ ]
32
 
33
  client = None
34
 
 
1831
  is_nothinking_model = True
1832
  base_model_name = request.model.replace("-nothinking","")
1833
  # Specific check for the flash model requiring budget
 
1834
  if base_model_name != "gemini-2.5-flash-preview-04-17":
1835
  error_response = create_openai_error_response(
1836
  400, f"Model '{request.model}' does not support -nothinking variant", "invalid_request_error"
 
1840
  is_max_thinking_model = True
1841
  base_model_name = request.model.replace("-max","")
1842
  # Specific check for the flash model requiring budget
 
1843
  if base_model_name != "gemini-2.5-flash-preview-04-17":
1844
  error_response = create_openai_error_response(
1845
  400, f"Model '{request.model}' does not support -max variant", "invalid_request_error"
1846
  )
1847
  return JSONResponse(status_code=400, content=error_response)
1848
  else:
1849
+ base_model_name = request.model # This ensures base_model_name is set if no suffix matches
1850
 
1851
  # Create generation config
1852
  generation_config = create_generation_config(request)
1853
 
1854
+ # --- Determine which client to use (Express, Rotation, or Fallback) ---
1855
  client_to_use = None
1856
+ express_api_key = os.environ.get(VERTEX_EXPRESS_API_KEY_ENV_VAR)
1857
 
1858
+ if express_api_key and base_model_name in VERTEX_EXPRESS_MODELS:
1859
+ print(f"INFO: Attempting to use Vertex Express Mode for model {base_model_name} with API Key.")
1860
  try:
1861
+ client_to_use = genai.Client(vertexai=True, api_key=express_api_key)
1862
+ print(f"INFO: Successfully initialized Vertex AI client in Express Mode for model {base_model_name}.")
 
1863
  except Exception as e:
1864
+ print(f"ERROR: Failed to initialize Vertex AI client in Express Mode: {e}. Falling back to other methods.")
1865
+ client_to_use = None # Ensure client_to_use is None if express mode fails
1866
 
1867
+ if client_to_use is None: # If Express Mode was not used or failed
1868
+ rotated_credentials, rotated_project_id = credential_manager.get_next_credentials()
1869
+ if rotated_credentials and rotated_project_id:
1870
+ try:
1871
+ # Create a request-specific client using the rotated credentials
1872
+ client_to_use = genai.Client(vertexai=True, credentials=rotated_credentials, project=rotated_project_id, location="us-central1")
1873
+ print(f"INFO: Using rotated credential for project: {rotated_project_id} (Index: {credential_manager.current_index -1 if credential_manager.current_index > 0 else credential_manager.get_total_credentials() - 1})") # Log which credential was used
1874
+ except Exception as e:
1875
+ print(f"ERROR: Failed to create client from rotated credential: {e}. Will attempt fallback.")
1876
+ client_to_use = None # Ensure it's None if creation failed
1877
+
1878
+ # If express and rotation failed or weren't possible, try the fallback client
1879
  if client_to_use is None:
1880
  global client # Access the fallback client initialized at startup
1881
  if client is not None:
1882
  client_to_use = client
1883
  print("INFO: Using fallback Vertex AI client.")
1884
  else:
1885
+ # Critical error: No express, rotated, AND no fallback client
1886
  error_response = create_openai_error_response(
1887
+ 500, "Vertex AI client not available (Express, Rotation failed and no fallback)", "server_error"
1888
  )
1889
  return JSONResponse(status_code=500, content=error_response)
1890
  # --- Client determined ---
 
2372
  }
2373
  }
2374
 
2375
+ # Removed /debug/credentials endpoint