Update app/main.py
Browse files- app/main.py +31 -15
app/main.py
CHANGED
@@ -22,6 +22,13 @@ from google.genai import types
|
|
22 |
|
23 |
from google import genai
|
24 |
import math
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
client = None
|
27 |
|
@@ -1824,7 +1831,6 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
|
|
1824 |
is_nothinking_model = True
|
1825 |
base_model_name = request.model.replace("-nothinking","")
|
1826 |
# Specific check for the flash model requiring budget
|
1827 |
-
# Specific check for the flash model requiring budget
|
1828 |
if base_model_name != "gemini-2.5-flash-preview-04-17":
|
1829 |
error_response = create_openai_error_response(
|
1830 |
400, f"Model '{request.model}' does not support -nothinking variant", "invalid_request_error"
|
@@ -1834,41 +1840,51 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
|
|
1834 |
is_max_thinking_model = True
|
1835 |
base_model_name = request.model.replace("-max","")
|
1836 |
# Specific check for the flash model requiring budget
|
1837 |
-
# Specific check for the flash model requiring budget
|
1838 |
if base_model_name != "gemini-2.5-flash-preview-04-17":
|
1839 |
error_response = create_openai_error_response(
|
1840 |
400, f"Model '{request.model}' does not support -max variant", "invalid_request_error"
|
1841 |
)
|
1842 |
return JSONResponse(status_code=400, content=error_response)
|
1843 |
else:
|
1844 |
-
base_model_name = request.model
|
1845 |
|
1846 |
# Create generation config
|
1847 |
generation_config = create_generation_config(request)
|
1848 |
|
1849 |
-
# --- Determine which client to use (Rotation or Fallback) ---
|
1850 |
client_to_use = None
|
1851 |
-
|
1852 |
|
1853 |
-
if
|
|
|
1854 |
try:
|
1855 |
-
|
1856 |
-
|
1857 |
-
print(f"INFO: Using rotated credential for project: {rotated_project_id} (Index: {credential_manager.current_index -1 if credential_manager.current_index > 0 else len(credential_manager.credentials_files) - 1})") # Log which credential was used
|
1858 |
except Exception as e:
|
1859 |
-
print(f"ERROR: Failed to
|
1860 |
-
client_to_use = None # Ensure
|
1861 |
|
1862 |
-
# If
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1863 |
if client_to_use is None:
|
1864 |
global client # Access the fallback client initialized at startup
|
1865 |
if client is not None:
|
1866 |
client_to_use = client
|
1867 |
print("INFO: Using fallback Vertex AI client.")
|
1868 |
else:
|
1869 |
-
# Critical error: No rotated
|
1870 |
error_response = create_openai_error_response(
|
1871 |
-
500, "Vertex AI client not available (Rotation failed and no fallback)", "server_error"
|
1872 |
)
|
1873 |
return JSONResponse(status_code=500, content=error_response)
|
1874 |
# --- Client determined ---
|
@@ -2356,4 +2372,4 @@ def health_check(api_key: str = Depends(get_api_key)):
|
|
2356 |
}
|
2357 |
}
|
2358 |
|
2359 |
-
# Removed /debug/credentials endpoint
|
|
|
22 |
|
23 |
from google import genai
|
24 |
import math
|
25 |
+
VERTEX_EXPRESS_API_KEY_ENV_VAR = "VERTEX_EXPRESS_API_KEY"
|
26 |
+
VERTEX_EXPRESS_MODELS = [
|
27 |
+
"gemini-2.0-flash-001",
|
28 |
+
"gemini-2.0-flash-lite-001",
|
29 |
+
"gemini-2.5-pro-preview-03-25",
|
30 |
+
"gemini-2.5-flash-preview-04-17",
|
31 |
+
]
|
32 |
|
33 |
client = None
|
34 |
|
|
|
1831 |
is_nothinking_model = True
|
1832 |
base_model_name = request.model.replace("-nothinking","")
|
1833 |
# Specific check for the flash model requiring budget
|
|
|
1834 |
if base_model_name != "gemini-2.5-flash-preview-04-17":
|
1835 |
error_response = create_openai_error_response(
|
1836 |
400, f"Model '{request.model}' does not support -nothinking variant", "invalid_request_error"
|
|
|
1840 |
is_max_thinking_model = True
|
1841 |
base_model_name = request.model.replace("-max","")
|
1842 |
# Specific check for the flash model requiring budget
|
|
|
1843 |
if base_model_name != "gemini-2.5-flash-preview-04-17":
|
1844 |
error_response = create_openai_error_response(
|
1845 |
400, f"Model '{request.model}' does not support -max variant", "invalid_request_error"
|
1846 |
)
|
1847 |
return JSONResponse(status_code=400, content=error_response)
|
1848 |
else:
|
1849 |
+
base_model_name = request.model # This ensures base_model_name is set if no suffix matches
|
1850 |
|
1851 |
# Create generation config
|
1852 |
generation_config = create_generation_config(request)
|
1853 |
|
1854 |
+
# --- Determine which client to use (Express, Rotation, or Fallback) ---
|
1855 |
client_to_use = None
|
1856 |
+
express_api_key = os.environ.get(VERTEX_EXPRESS_API_KEY_ENV_VAR)
|
1857 |
|
1858 |
+
if express_api_key and base_model_name in VERTEX_EXPRESS_MODELS:
|
1859 |
+
print(f"INFO: Attempting to use Vertex Express Mode for model {base_model_name} with API Key.")
|
1860 |
try:
|
1861 |
+
client_to_use = genai.Client(vertexai=True, api_key=express_api_key)
|
1862 |
+
print(f"INFO: Successfully initialized Vertex AI client in Express Mode for model {base_model_name}.")
|
|
|
1863 |
except Exception as e:
|
1864 |
+
print(f"ERROR: Failed to initialize Vertex AI client in Express Mode: {e}. Falling back to other methods.")
|
1865 |
+
client_to_use = None # Ensure client_to_use is None if express mode fails
|
1866 |
|
1867 |
+
if client_to_use is None: # If Express Mode was not used or failed
|
1868 |
+
rotated_credentials, rotated_project_id = credential_manager.get_next_credentials()
|
1869 |
+
if rotated_credentials and rotated_project_id:
|
1870 |
+
try:
|
1871 |
+
# Create a request-specific client using the rotated credentials
|
1872 |
+
client_to_use = genai.Client(vertexai=True, credentials=rotated_credentials, project=rotated_project_id, location="us-central1")
|
1873 |
+
print(f"INFO: Using rotated credential for project: {rotated_project_id} (Index: {credential_manager.current_index -1 if credential_manager.current_index > 0 else credential_manager.get_total_credentials() - 1})") # Log which credential was used
|
1874 |
+
except Exception as e:
|
1875 |
+
print(f"ERROR: Failed to create client from rotated credential: {e}. Will attempt fallback.")
|
1876 |
+
client_to_use = None # Ensure it's None if creation failed
|
1877 |
+
|
1878 |
+
# If express and rotation failed or weren't possible, try the fallback client
|
1879 |
if client_to_use is None:
|
1880 |
global client # Access the fallback client initialized at startup
|
1881 |
if client is not None:
|
1882 |
client_to_use = client
|
1883 |
print("INFO: Using fallback Vertex AI client.")
|
1884 |
else:
|
1885 |
+
# Critical error: No express, rotated, AND no fallback client
|
1886 |
error_response = create_openai_error_response(
|
1887 |
+
500, "Vertex AI client not available (Express, Rotation failed and no fallback)", "server_error"
|
1888 |
)
|
1889 |
return JSONResponse(status_code=500, content=error_response)
|
1890 |
# --- Client determined ---
|
|
|
2372 |
}
|
2373 |
}
|
2374 |
|
2375 |
+
# Removed /debug/credentials endpoint
|