bibibi12345 commited on
Commit
71235a6
·
1 Parent(s): aa79ca3

separated vertex and express models

Browse files
app/model_loader.py CHANGED
@@ -31,9 +31,13 @@ async def fetch_and_parse_models_config() -> Optional[Dict[str, List[str]]]:
31
  "vertex_models" in data and isinstance(data["vertex_models"], list) and \
32
  "vertex_express_models" in data and isinstance(data["vertex_express_models"], list):
33
  print("Successfully fetched and parsed model configuration.")
 
 
 
 
34
  return {
35
  "vertex_models": data["vertex_models"],
36
- "vertex_express_models": data["vertex_express_models"]
37
  }
38
  else:
39
  print(f"ERROR: Fetched model configuration has an invalid structure: {data}")
 
31
  "vertex_models" in data and isinstance(data["vertex_models"], list) and \
32
  "vertex_express_models" in data and isinstance(data["vertex_express_models"], list):
33
  print("Successfully fetched and parsed model configuration.")
34
+
35
+ # Add [EXPRESS] prefix to express models
36
+ prefixed_express_models = [f"[EXPRESS] {model_name}" for model_name in data["vertex_express_models"]]
37
+
38
  return {
39
  "vertex_models": data["vertex_models"],
40
+ "vertex_express_models": prefixed_express_models
41
  }
42
  else:
43
  print(f"ERROR: Fetched model configuration has an invalid structure: {data}")
app/routes/chat_api.py CHANGED
@@ -37,11 +37,12 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
37
  OPENAI_DIRECT_SUFFIX = "-openai"
38
  EXPERIMENTAL_MARKER = "-exp-"
39
  PAY_PREFIX = "[PAY]"
 
40
 
41
  # Model validation based on a predefined list has been removed as per user request.
42
  # The application will now attempt to use any provided model string.
43
  # We still need to fetch vertex_express_model_ids for the Express Mode logic.
44
- vertex_express_model_ids = await get_vertex_express_models()
45
 
46
  # Updated logic for is_openai_direct_model
47
  is_openai_direct_model = False
@@ -57,25 +58,37 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
57
  is_encrypted_full_model = request.model.endswith("-encrypt-full")
58
  is_nothinking_model = request.model.endswith("-nothinking")
59
  is_max_thinking_model = request.model.endswith("-max")
60
- base_model_name = request.model
61
 
62
- # Determine base_model_name by stripping known suffixes
63
- # This order matters if a model could have multiple (e.g. -encrypt-auto, though not currently a pattern)
64
- if is_openai_direct_model:
65
- # The general PAY_PREFIX stripper later will handle if this result starts with [PAY]
66
- base_model_name = request.model[:-len(OPENAI_DIRECT_SUFFIX)]
67
- elif is_auto_model: base_model_name = request.model[:-len("-auto")]
68
- elif is_grounded_search: base_model_name = request.model[:-len("-search")]
69
- elif is_encrypted_full_model: base_model_name = request.model[:-len("-encrypt-full")] # Must be before -encrypt
70
- elif is_encrypted_model: base_model_name = request.model[:-len("-encrypt")]
71
- elif is_nothinking_model: base_model_name = request.model[:-len("-nothinking")]
72
- elif is_max_thinking_model: base_model_name = request.model[:-len("-max")]
73
 
74
- # After all suffix stripping, if PAY_PREFIX is still at the start of base_model_name, remove it.
75
- # This handles cases like "[PAY]model-id-search" correctly.
 
 
 
76
  if base_model_name.startswith(PAY_PREFIX):
77
  base_model_name = base_model_name[len(PAY_PREFIX):]
78
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  # Specific model variant checks (if any remain exclusive and not covered dynamically)
80
  if is_nothinking_model and base_model_name != "gemini-2.5-flash-preview-04-17":
81
  return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' (-nothinking) is only supported for 'gemini-2.5-flash-preview-04-17'.", "invalid_request_error"))
@@ -86,37 +99,65 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
86
 
87
  client_to_use = None
88
  express_api_keys_list = app_config.VERTEX_EXPRESS_API_KEY_VAL
89
-
90
- # Use dynamically fetched express models list for this check
91
- if express_api_keys_list and base_model_name in vertex_express_model_ids: # Check against base_model_name
 
 
 
 
 
 
 
 
 
 
 
 
92
  indexed_keys = list(enumerate(express_api_keys_list))
93
  random.shuffle(indexed_keys)
94
 
95
  for original_idx, key_val in indexed_keys:
96
  try:
97
  client_to_use = genai.Client(vertexai=True, api_key=key_val)
98
- print(f"INFO: Using Vertex Express Mode for model {base_model_name} with API key (original index: {original_idx}).")
99
  break # Successfully initialized client
100
  except Exception as e:
101
- print(f"WARNING: Vertex Express Mode client init failed for API key (original index: {original_idx}): {e}. Trying next key if available.")
102
- client_to_use = None # Ensure client_to_use is None if this attempt fails
103
-
104
- if client_to_use is None:
105
- print(f"WARNING: All {len(express_api_keys_list)} Vertex Express API key(s) failed to initialize for model {base_model_name}. Falling back.")
106
 
107
- if client_to_use is None:
 
 
 
 
 
 
108
  rotated_credentials, rotated_project_id = credential_manager_instance.get_random_credentials()
 
109
  if rotated_credentials and rotated_project_id:
110
  try:
111
  client_to_use = genai.Client(vertexai=True, credentials=rotated_credentials, project=rotated_project_id, location="global")
112
- print(f"INFO: Using rotated credential for project: {rotated_project_id}")
113
  except Exception as e:
114
- print(f"ERROR: Rotated credential client init failed: {e}. Falling back.")
115
- client_to_use = None
116
-
117
- if client_to_use is None:
118
- print("ERROR: No Vertex AI client could be initialized via Express Mode or Rotated Credentials.")
119
- return JSONResponse(status_code=500, content=create_openai_error_response(500, "Vertex AI client not available. Ensure credentials are set up correctly (env var or files).", "server_error"))
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
  encryption_instructions_placeholder = ["// Protocol Instructions Placeholder //"] # Actual instructions are in message_processing
122
  if is_openai_direct_model:
 
37
  OPENAI_DIRECT_SUFFIX = "-openai"
38
  EXPERIMENTAL_MARKER = "-exp-"
39
  PAY_PREFIX = "[PAY]"
40
+ EXPRESS_PREFIX = "[EXPRESS] " # Note the space for easier stripping
41
 
42
  # Model validation based on a predefined list has been removed as per user request.
43
  # The application will now attempt to use any provided model string.
44
  # We still need to fetch vertex_express_model_ids for the Express Mode logic.
45
+ # vertex_express_model_ids = await get_vertex_express_models() # We'll use the prefix now
46
 
47
  # Updated logic for is_openai_direct_model
48
  is_openai_direct_model = False
 
58
  is_encrypted_full_model = request.model.endswith("-encrypt-full")
59
  is_nothinking_model = request.model.endswith("-nothinking")
60
  is_max_thinking_model = request.model.endswith("-max")
61
+ base_model_name = request.model # Start with the full model name
62
 
63
+ # Determine base_model_name by stripping known prefixes and suffixes
64
+ # Order of stripping: Prefixes first, then suffixes.
 
 
 
 
 
 
 
 
 
65
 
66
+ is_express_model_request = False
67
+ if base_model_name.startswith(EXPRESS_PREFIX):
68
+ is_express_model_request = True
69
+ base_model_name = base_model_name[len(EXPRESS_PREFIX):]
70
+
71
  if base_model_name.startswith(PAY_PREFIX):
72
  base_model_name = base_model_name[len(PAY_PREFIX):]
73
+
74
+ # Suffix stripping (applied to the name after prefix removal)
75
+ # This order matters if a model could have multiple (e.g. -encrypt-auto, though not currently a pattern)
76
+ if is_openai_direct_model: # This check is based on request.model, so it's fine here
77
+ # If it was an OpenAI direct model, its base name is request.model minus suffix.
78
+ # We need to ensure PAY_PREFIX or EXPRESS_PREFIX are also stripped if they were part of the original.
79
+ temp_base_for_openai = request.model[:-len(OPENAI_DIRECT_SUFFIX)]
80
+ if temp_base_for_openai.startswith(EXPRESS_PREFIX):
81
+ temp_base_for_openai = temp_base_for_openai[len(EXPRESS_PREFIX):]
82
+ if temp_base_for_openai.startswith(PAY_PREFIX):
83
+ temp_base_for_openai = temp_base_for_openai[len(PAY_PREFIX):]
84
+ base_model_name = temp_base_for_openai # Assign the fully stripped name
85
+ elif is_auto_model: base_model_name = base_model_name[:-len("-auto")]
86
+ elif is_grounded_search: base_model_name = base_model_name[:-len("-search")]
87
+ elif is_encrypted_full_model: base_model_name = base_model_name[:-len("-encrypt-full")] # Must be before -encrypt
88
+ elif is_encrypted_model: base_model_name = base_model_name[:-len("-encrypt")]
89
+ elif is_nothinking_model: base_model_name = base_model_name[:-len("-nothinking")]
90
+ elif is_max_thinking_model: base_model_name = base_model_name[:-len("-max")]
91
+
92
  # Specific model variant checks (if any remain exclusive and not covered dynamically)
93
  if is_nothinking_model and base_model_name != "gemini-2.5-flash-preview-04-17":
94
  return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' (-nothinking) is only supported for 'gemini-2.5-flash-preview-04-17'.", "invalid_request_error"))
 
99
 
100
  client_to_use = None
101
  express_api_keys_list = app_config.VERTEX_EXPRESS_API_KEY_VAL
102
+
103
+ # This client initialization logic is for Gemini models.
104
+ # OpenAI Direct models have their own client setup and will return before this.
105
+ if is_openai_direct_model:
106
+ # OpenAI Direct logic is self-contained and will return.
107
+ # If it doesn't return, it means we proceed to Gemini logic, which shouldn't happen
108
+ # if is_openai_direct_model is true. The main if/elif/else for model types handles this.
109
+ pass
110
+ elif is_express_model_request:
111
+ if not express_api_keys_list:
112
+ error_msg = f"Model '{request.model}' is an Express model and requires an Express API key, but none are configured."
113
+ print(f"ERROR: {error_msg}")
114
+ return JSONResponse(status_code=401, content=create_openai_error_response(401, error_msg, "authentication_error"))
115
+
116
+ print(f"INFO: Attempting Vertex Express Mode for model request: {request.model} (base: {base_model_name})")
117
  indexed_keys = list(enumerate(express_api_keys_list))
118
  random.shuffle(indexed_keys)
119
 
120
  for original_idx, key_val in indexed_keys:
121
  try:
122
  client_to_use = genai.Client(vertexai=True, api_key=key_val)
123
+ print(f"INFO: Using Vertex Express Mode for model {request.model} (base: {base_model_name}) with API key (original index: {original_idx}).")
124
  break # Successfully initialized client
125
  except Exception as e:
126
+ print(f"WARNING: Vertex Express Mode client init failed for API key (original index: {original_idx}) for model {request.model}: {e}. Trying next key.")
127
+ client_to_use = None # Ensure client_to_use is None for this attempt
 
 
 
128
 
129
+ if client_to_use is None: # All configured Express keys failed
130
+ error_msg = f"All configured Express API keys failed to initialize for model '{request.model}'."
131
+ print(f"ERROR: {error_msg}")
132
+ return JSONResponse(status_code=500, content=create_openai_error_response(500, error_msg, "server_error"))
133
+
134
+ else: # Not an Express model request, therefore an SA credential model request for Gemini
135
+ print(f"INFO: Model '{request.model}' is an SA credential request for Gemini. Attempting SA credentials.")
136
  rotated_credentials, rotated_project_id = credential_manager_instance.get_random_credentials()
137
+
138
  if rotated_credentials and rotated_project_id:
139
  try:
140
  client_to_use = genai.Client(vertexai=True, credentials=rotated_credentials, project=rotated_project_id, location="global")
141
+ print(f"INFO: Using SA credential for Gemini model {request.model} (project: {rotated_project_id})")
142
  except Exception as e:
143
+ client_to_use = None # Ensure it's None on failure
144
+ error_msg = f"SA credential client initialization failed for Gemini model '{request.model}': {e}."
145
+ print(f"ERROR: {error_msg}")
146
+ return JSONResponse(status_code=500, content=create_openai_error_response(500, error_msg, "server_error"))
147
+ else: # No SA credentials available for an SA model request
148
+ error_msg = f"Model '{request.model}' requires SA credentials for Gemini, but none are available or loaded."
149
+ print(f"ERROR: {error_msg}")
150
+ return JSONResponse(status_code=401, content=create_openai_error_response(401, error_msg, "authentication_error"))
151
+
152
+ # If we reach here and client_to_use is still None, it means it's an OpenAI Direct Model,
153
+ # which handles its own client and responses.
154
+ # For Gemini models (Express or SA), client_to_use must be set, or an error returned above.
155
+ if not is_openai_direct_model and client_to_use is None:
156
+ # This case should ideally not be reached if the logic above is correct,
157
+ # as each path (Express/SA for Gemini) should either set client_to_use or return an error.
158
+ # This is a safeguard.
159
+ print(f"CRITICAL ERROR: Client for Gemini model '{request.model}' was not initialized, and no specific error was returned. This indicates a logic flaw.")
160
+ return JSONResponse(status_code=500, content=create_openai_error_response(500, "Critical internal server error: Gemini client not initialized.", "server_error"))
161
 
162
  encryption_instructions_placeholder = ["// Protocol Instructions Placeholder //"] # Actual instructions are in message_processing
163
  if is_openai_direct_model:
app/routes/models_api.py CHANGED
@@ -56,7 +56,9 @@ async def list_models(fastapi_request: Request, api_key: str = Depends(get_api_k
56
  # Add base models and their variations
57
  for original_model_id in sorted(list(all_model_ids)):
58
  current_display_prefix = ""
59
- if has_sa_creds and not has_express_key and EXPERIMENTAL_MARKER not in original_model_id:
 
 
60
  current_display_prefix = PAY_PREFIX
61
 
62
  base_display_id = f"{current_display_prefix}{original_model_id}"
 
56
  # Add base models and their variations
57
  for original_model_id in sorted(list(all_model_ids)):
58
  current_display_prefix = ""
59
+ # Only add PAY_PREFIX if the model is not already an EXPRESS model (which has its own prefix)
60
+ if not original_model_id.startswith("[EXPRESS]") and \
61
+ has_sa_creds and not has_express_key and EXPERIMENTAL_MARKER not in original_model_id:
62
  current_display_prefix = PAY_PREFIX
63
 
64
  base_display_id = f"{current_display_prefix}{original_model_id}"