Spaces:

bibibi12345
/

vertex

Building

App Files Files Community

bibibi12345 commited on May 15

Commit

be547ae

1 Parent(s): da6c071

support multiple express keys

Browse files

Files changed (2) hide show

app/config.py +5 -1
app/routes/chat_api.py +22 -8

app/config.py CHANGED Viewed

@@ -13,7 +13,11 @@ CREDENTIALS_DIR = os.environ.get("CREDENTIALS_DIR", "/app/credentials")
 GOOGLE_CREDENTIALS_JSON_STR = os.environ.get("GOOGLE_CREDENTIALS_JSON")
 # API Key for Vertex Express Mode
-VERTEX_EXPRESS_API_KEY_VAL = os.environ.get("VERTEX_EXPRESS_API_KEY")
 # Fake streaming settings for debugging/testing
 FAKE_STREAMING_ENABLED = os.environ.get("FAKE_STREAMING", "false").lower() == "true"

 GOOGLE_CREDENTIALS_JSON_STR = os.environ.get("GOOGLE_CREDENTIALS_JSON")
 # API Key for Vertex Express Mode
+raw_vertex_keys = os.environ.get("VERTEX_EXPRESS_API_KEY")
+if raw_vertex_keys:
+    VERTEX_EXPRESS_API_KEY_VAL = [key.strip() for key in raw_vertex_keys.split(',') if key.strip()]
+else:
+    VERTEX_EXPRESS_API_KEY_VAL = []
 # Fake streaming settings for debugging/testing
 FAKE_STREAMING_ENABLED = os.environ.get("FAKE_STREAMING", "false").lower() == "true"

app/routes/chat_api.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import asyncio
 import json # Needed for error streaming
 from fastapi import APIRouter, Depends, Request
 from fastapi.responses import JSONResponse, StreamingResponse
 from typing import List, Dict, Any
@@ -100,16 +101,29 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
         generation_config = create_generation_config(request)
         client_to_use = None
-        express_api_key_val = app_config.VERTEX_EXPRESS_API_KEY_VAL
         # Use dynamically fetched express models list for this check
-        if express_api_key_val and base_model_name in vertex_express_model_ids: # Check against base_model_name
-            try:
-                client_to_use = genai.Client(vertexai=True, api_key=express_api_key_val)
-                print(f"INFO: Using Vertex Express Mode for model {base_model_name}.")
-            except Exception as e:
-                print(f"ERROR: Vertex Express Mode client init failed: {e}. Falling back.")
-                client_to_use = None
         if client_to_use is None:
             rotated_credentials, rotated_project_id = credential_manager_instance.get_random_credentials()

 import asyncio
 import json # Needed for error streaming
+import random
 from fastapi import APIRouter, Depends, Request
 from fastapi.responses import JSONResponse, StreamingResponse
 from typing import List, Dict, Any
         generation_config = create_generation_config(request)
         client_to_use = None
+        express_api_keys_list = app_config.VERTEX_EXPRESS_API_KEY_VAL
         # Use dynamically fetched express models list for this check
+        if express_api_keys_list and base_model_name in vertex_express_model_ids: # Check against base_model_name
+            indexed_keys = list(enumerate(express_api_keys_list))
+            random.shuffle(indexed_keys)
+            for original_idx, key_val in indexed_keys:
+                try:
+                    client_to_use = genai.Client(vertexai=True, api_key=key_val)
+                    print(f"INFO: Using Vertex Express Mode for model {base_model_name} with API key (original index: {original_idx}).")
+                    break # Successfully initialized client
+                except Exception as e:
+                    print(f"WARNING: Vertex Express Mode client init failed for API key (original index: {original_idx}): {e}. Trying next key if available.")
+                    client_to_use = None # Ensure client_to_use is None if this attempt fails
+            if client_to_use is None:
+                print(f"WARNING: All {len(express_api_keys_list)} Vertex Express API key(s) failed to initialize for model {base_model_name}. Falling back.")
+        # else:
+        #     if not express_api_keys_list:
+        #         print(f"DEBUG: No Vertex Express API keys configured. Skipping Express Mode attempt for model {base_model_name}.")
+        #     elif base_model_name not in vertex_express_model_ids:
+        #         print(f"DEBUG: Model {base_model_name} is not in the Vertex Express model list. Skipping Express Mode attempt.")
         if client_to_use is None:
             rotated_credentials, rotated_project_id = credential_manager_instance.get_random_credentials()