Commit
·
07be514
1
Parent(s):
ad782c0
added dynamic model list
Browse files- app/config.py +3 -0
- app/main.py +2 -2
- app/requirements.txt +2 -1
- app/routes/chat_api.py +71 -23
- app/routes/models_api.py +81 -44
- app/vertex_ai_init.py +18 -11
app/config.py
CHANGED
@@ -19,4 +19,7 @@ VERTEX_EXPRESS_API_KEY_VAL = os.environ.get("VERTEX_EXPRESS_API_KEY")
|
|
19 |
FAKE_STREAMING_ENABLED = os.environ.get("FAKE_STREAMING", "false").lower() == "true"
|
20 |
FAKE_STREAMING_INTERVAL_SECONDS = float(os.environ.get("FAKE_STREAMING_INTERVAL", "1.0"))
|
21 |
|
|
|
|
|
|
|
22 |
# Validation logic moved to app/auth.py
|
|
|
19 |
FAKE_STREAMING_ENABLED = os.environ.get("FAKE_STREAMING", "false").lower() == "true"
|
20 |
FAKE_STREAMING_INTERVAL_SECONDS = float(os.environ.get("FAKE_STREAMING_INTERVAL", "1.0"))
|
21 |
|
22 |
+
# URL for the remote JSON file containing model lists
|
23 |
+
MODELS_CONFIG_URL = os.environ.get("MODELS_CONFIG_URL", "https://gist.githubusercontent.com/gzzhongqi/e0b684f319437a859bcf5bd6203fd1f6/raw")
|
24 |
+
|
25 |
# Validation logic moved to app/auth.py
|
app/main.py
CHANGED
@@ -35,8 +35,8 @@ app.include_router(chat_api.router)
|
|
35 |
|
36 |
@app.on_event("startup")
|
37 |
async def startup_event():
|
38 |
-
if init_vertex_ai(credential_manager):
|
39 |
-
print("INFO:
|
40 |
else:
|
41 |
print("ERROR: Failed to initialize a fallback Vertex AI client. API will likely fail.")
|
42 |
|
|
|
35 |
|
36 |
@app.on_event("startup")
|
37 |
async def startup_event():
|
38 |
+
if await init_vertex_ai(credential_manager): # Added await
|
39 |
+
print("INFO: Vertex AI credential and model config initialization check completed successfully.")
|
40 |
else:
|
41 |
print("ERROR: Failed to initialize a fallback Vertex AI client. API will likely fail.")
|
42 |
|
app/requirements.txt
CHANGED
@@ -3,4 +3,5 @@ uvicorn==0.27.1
|
|
3 |
google-auth==2.38.0
|
4 |
google-cloud-aiplatform==1.86.0
|
5 |
pydantic==2.6.1
|
6 |
-
google-genai==1.13.0
|
|
|
|
3 |
google-auth==2.38.0
|
4 |
google-cloud-aiplatform==1.86.0
|
5 |
pydantic==2.6.1
|
6 |
+
google-genai==1.13.0
|
7 |
+
httpx>=0.25.0
|
app/routes/chat_api.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import asyncio
|
2 |
import json # Needed for error streaming
|
3 |
-
from fastapi import APIRouter, Depends, Request
|
4 |
from fastapi.responses import JSONResponse, StreamingResponse
|
5 |
from typing import List, Dict, Any
|
6 |
|
@@ -8,12 +8,12 @@ from typing import List, Dict, Any
|
|
8 |
from google.genai import types
|
9 |
from google import genai
|
10 |
|
11 |
-
# Local module imports
|
12 |
from models import OpenAIRequest, OpenAIMessage
|
13 |
from auth import get_api_key
|
14 |
-
# from main import credential_manager # Removed
|
15 |
import config as app_config
|
16 |
-
from
|
17 |
from message_processing import (
|
18 |
create_gemini_prompt,
|
19 |
create_encrypted_gemini_prompt,
|
@@ -27,12 +27,40 @@ from api_helpers import (
|
|
27 |
|
28 |
router = APIRouter()
|
29 |
|
30 |
-
|
31 |
@router.post("/v1/chat/completions")
|
32 |
async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api_key: str = Depends(get_api_key)):
|
33 |
try:
|
34 |
-
# Access credential_manager from app state
|
35 |
credential_manager_instance = fastapi_request.app.state.credential_manager
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
is_auto_model = request.model.endswith("-auto")
|
37 |
is_grounded_search = request.model.endswith("-search")
|
38 |
is_encrypted_model = request.model.endswith("-encrypt")
|
@@ -41,18 +69,28 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
|
|
41 |
is_max_thinking_model = request.model.endswith("-max")
|
42 |
base_model_name = request.model
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
elif
|
48 |
-
elif
|
49 |
-
elif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
generation_config = create_generation_config(request)
|
51 |
|
52 |
client_to_use = None
|
53 |
express_api_key_val = app_config.VERTEX_EXPRESS_API_KEY_VAL
|
54 |
-
|
55 |
-
|
|
|
56 |
try:
|
57 |
client_to_use = genai.Client(vertexai=True, api_key=express_api_key_val)
|
58 |
print(f"INFO: Using Vertex Express Mode for model {base_model_name}.")
|
@@ -74,28 +112,28 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
|
|
74 |
print("ERROR: No Vertex AI client could be initialized via Express Mode or Rotated Credentials.")
|
75 |
return JSONResponse(status_code=500, content=create_openai_error_response(500, "Vertex AI client not available. Ensure credentials are set up correctly (env var or files).", "server_error"))
|
76 |
|
77 |
-
|
78 |
|
79 |
if is_auto_model:
|
80 |
print(f"Processing auto model: {request.model}")
|
81 |
attempts = [
|
82 |
{"name": "base", "model": base_model_name, "prompt_func": create_gemini_prompt, "config_modifier": lambda c: c},
|
83 |
-
{"name": "encrypt", "model": base_model_name, "prompt_func": create_encrypted_gemini_prompt, "config_modifier": lambda c: {**c, "system_instruction":
|
84 |
{"name": "old_format", "model": base_model_name, "prompt_func": create_encrypted_full_gemini_prompt, "config_modifier": lambda c: c}
|
85 |
]
|
86 |
last_err = None
|
87 |
for attempt in attempts:
|
88 |
-
print(f"Auto-mode attempting: '{attempt['name']}'")
|
89 |
current_gen_config = attempt["config_modifier"](generation_config.copy())
|
90 |
try:
|
91 |
return await execute_gemini_call(client_to_use, attempt["model"], attempt["prompt_func"], current_gen_config, request)
|
92 |
except Exception as e_auto:
|
93 |
last_err = e_auto
|
94 |
-
print(f"Auto-attempt '{attempt['name']}' failed: {e_auto}")
|
95 |
await asyncio.sleep(1)
|
96 |
|
97 |
print(f"All auto attempts failed. Last error: {last_err}")
|
98 |
-
err_msg = f"All auto-mode attempts failed for {request.model}. Last error: {str(last_err)}"
|
99 |
if not request.stream and last_err:
|
100 |
return JSONResponse(status_code=500, content=create_openai_error_response(500, err_msg, "server_error"))
|
101 |
elif request.stream:
|
@@ -106,23 +144,33 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
|
|
106 |
return StreamingResponse(final_error_stream(), media_type="text/event-stream")
|
107 |
return JSONResponse(status_code=500, content=create_openai_error_response(500, "All auto-mode attempts failed without specific error.", "server_error"))
|
108 |
|
109 |
-
else:
|
110 |
current_prompt_func = create_gemini_prompt
|
|
|
|
|
|
|
111 |
if is_grounded_search:
|
112 |
search_tool = types.Tool(google_search=types.GoogleSearch())
|
113 |
generation_config["tools"] = [search_tool]
|
114 |
elif is_encrypted_model:
|
115 |
-
generation_config["system_instruction"] =
|
116 |
current_prompt_func = create_encrypted_gemini_prompt
|
117 |
elif is_encrypted_full_model:
|
118 |
-
generation_config["system_instruction"] =
|
119 |
current_prompt_func = create_encrypted_full_gemini_prompt
|
120 |
elif is_nothinking_model:
|
121 |
generation_config["thinking_config"] = {"thinking_budget": 0}
|
122 |
elif is_max_thinking_model:
|
123 |
generation_config["thinking_config"] = {"thinking_budget": 24576}
|
124 |
|
125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
|
127 |
except Exception as e:
|
128 |
error_msg = f"Unexpected error in chat_completions endpoint: {str(e)}"
|
|
|
1 |
import asyncio
|
2 |
import json # Needed for error streaming
|
3 |
+
from fastapi import APIRouter, Depends, Request
|
4 |
from fastapi.responses import JSONResponse, StreamingResponse
|
5 |
from typing import List, Dict, Any
|
6 |
|
|
|
8 |
from google.genai import types
|
9 |
from google import genai
|
10 |
|
11 |
+
# Local module imports
|
12 |
from models import OpenAIRequest, OpenAIMessage
|
13 |
from auth import get_api_key
|
14 |
+
# from main import credential_manager # Removed to prevent circular import; accessed via request.app.state
|
15 |
import config as app_config
|
16 |
+
from model_loader import get_vertex_models, get_vertex_express_models # Import from model_loader
|
17 |
from message_processing import (
|
18 |
create_gemini_prompt,
|
19 |
create_encrypted_gemini_prompt,
|
|
|
27 |
|
28 |
router = APIRouter()
|
29 |
|
|
|
30 |
@router.post("/v1/chat/completions")
|
31 |
async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api_key: str = Depends(get_api_key)):
|
32 |
try:
|
|
|
33 |
credential_manager_instance = fastapi_request.app.state.credential_manager
|
34 |
+
|
35 |
+
# Dynamically fetch allowed models for validation
|
36 |
+
vertex_model_ids = await get_vertex_models()
|
37 |
+
# Suffixes that can be appended to base models.
|
38 |
+
# The remote model config should ideally be the source of truth for all valid permutations.
|
39 |
+
standard_suffixes = ["-search", "-encrypt", "-encrypt-full", "-auto"]
|
40 |
+
# No longer using special_suffix_map, will use prefix check instead
|
41 |
+
|
42 |
+
all_allowed_model_ids = set(vertex_model_ids) # Start with base models from config
|
43 |
+
for base_id in vertex_model_ids: # Iterate over base models to add suffixed versions
|
44 |
+
# Apply standard suffixes only if not gemini-2.0
|
45 |
+
if not base_id.startswith("gemini-2.0"):
|
46 |
+
for suffix in standard_suffixes:
|
47 |
+
all_allowed_model_ids.add(f"{base_id}{suffix}")
|
48 |
+
|
49 |
+
# Apply special suffixes for models starting with "gemini-2.5-flash"
|
50 |
+
if base_id.startswith("gemini-2.5-flash"):
|
51 |
+
special_flash_suffixes = ["-nothinking", "-max"]
|
52 |
+
for special_suffix in special_flash_suffixes:
|
53 |
+
all_allowed_model_ids.add(f"{base_id}{special_suffix}")
|
54 |
+
|
55 |
+
# Add express models to the allowed list as well.
|
56 |
+
# These should be full names from the remote config.
|
57 |
+
vertex_express_model_ids = await get_vertex_express_models()
|
58 |
+
all_allowed_model_ids.update(vertex_express_model_ids)
|
59 |
+
|
60 |
+
|
61 |
+
if not request.model or request.model not in all_allowed_model_ids:
|
62 |
+
return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' not found or not supported by this adapter. Valid models are: {sorted(list(all_allowed_model_ids))}", "invalid_request_error"))
|
63 |
+
|
64 |
is_auto_model = request.model.endswith("-auto")
|
65 |
is_grounded_search = request.model.endswith("-search")
|
66 |
is_encrypted_model = request.model.endswith("-encrypt")
|
|
|
69 |
is_max_thinking_model = request.model.endswith("-max")
|
70 |
base_model_name = request.model
|
71 |
|
72 |
+
# Determine base_model_name by stripping known suffixes
|
73 |
+
# This order matters if a model could have multiple (e.g. -encrypt-auto, though not currently a pattern)
|
74 |
+
if is_auto_model: base_model_name = request.model[:-len("-auto")]
|
75 |
+
elif is_grounded_search: base_model_name = request.model[:-len("-search")]
|
76 |
+
elif is_encrypted_full_model: base_model_name = request.model[:-len("-encrypt-full")] # Must be before -encrypt
|
77 |
+
elif is_encrypted_model: base_model_name = request.model[:-len("-encrypt")]
|
78 |
+
elif is_nothinking_model: base_model_name = request.model[:-len("-nothinking")]
|
79 |
+
elif is_max_thinking_model: base_model_name = request.model[:-len("-max")]
|
80 |
+
|
81 |
+
# Specific model variant checks (if any remain exclusive and not covered dynamically)
|
82 |
+
if is_nothinking_model and base_model_name != "gemini-2.5-flash-preview-04-17":
|
83 |
+
return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' (-nothinking) is only supported for 'gemini-2.5-flash-preview-04-17'.", "invalid_request_error"))
|
84 |
+
if is_max_thinking_model and base_model_name != "gemini-2.5-flash-preview-04-17":
|
85 |
+
return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' (-max) is only supported for 'gemini-2.5-flash-preview-04-17'.", "invalid_request_error"))
|
86 |
+
|
87 |
generation_config = create_generation_config(request)
|
88 |
|
89 |
client_to_use = None
|
90 |
express_api_key_val = app_config.VERTEX_EXPRESS_API_KEY_VAL
|
91 |
+
|
92 |
+
# Use dynamically fetched express models list for this check
|
93 |
+
if express_api_key_val and base_model_name in vertex_express_model_ids: # Check against base_model_name
|
94 |
try:
|
95 |
client_to_use = genai.Client(vertexai=True, api_key=express_api_key_val)
|
96 |
print(f"INFO: Using Vertex Express Mode for model {base_model_name}.")
|
|
|
112 |
print("ERROR: No Vertex AI client could be initialized via Express Mode or Rotated Credentials.")
|
113 |
return JSONResponse(status_code=500, content=create_openai_error_response(500, "Vertex AI client not available. Ensure credentials are set up correctly (env var or files).", "server_error"))
|
114 |
|
115 |
+
encryption_instructions_placeholder = ["// Protocol Instructions Placeholder //"] # Actual instructions are in message_processing
|
116 |
|
117 |
if is_auto_model:
|
118 |
print(f"Processing auto model: {request.model}")
|
119 |
attempts = [
|
120 |
{"name": "base", "model": base_model_name, "prompt_func": create_gemini_prompt, "config_modifier": lambda c: c},
|
121 |
+
{"name": "encrypt", "model": base_model_name, "prompt_func": create_encrypted_gemini_prompt, "config_modifier": lambda c: {**c, "system_instruction": encryption_instructions_placeholder}},
|
122 |
{"name": "old_format", "model": base_model_name, "prompt_func": create_encrypted_full_gemini_prompt, "config_modifier": lambda c: c}
|
123 |
]
|
124 |
last_err = None
|
125 |
for attempt in attempts:
|
126 |
+
print(f"Auto-mode attempting: '{attempt['name']}' for model {attempt['model']}")
|
127 |
current_gen_config = attempt["config_modifier"](generation_config.copy())
|
128 |
try:
|
129 |
return await execute_gemini_call(client_to_use, attempt["model"], attempt["prompt_func"], current_gen_config, request)
|
130 |
except Exception as e_auto:
|
131 |
last_err = e_auto
|
132 |
+
print(f"Auto-attempt '{attempt['name']}' for model {attempt['model']} failed: {e_auto}")
|
133 |
await asyncio.sleep(1)
|
134 |
|
135 |
print(f"All auto attempts failed. Last error: {last_err}")
|
136 |
+
err_msg = f"All auto-mode attempts failed for model {request.model}. Last error: {str(last_err)}"
|
137 |
if not request.stream and last_err:
|
138 |
return JSONResponse(status_code=500, content=create_openai_error_response(500, err_msg, "server_error"))
|
139 |
elif request.stream:
|
|
|
144 |
return StreamingResponse(final_error_stream(), media_type="text/event-stream")
|
145 |
return JSONResponse(status_code=500, content=create_openai_error_response(500, "All auto-mode attempts failed without specific error.", "server_error"))
|
146 |
|
147 |
+
else: # Not an auto model
|
148 |
current_prompt_func = create_gemini_prompt
|
149 |
+
# Determine the actual model string to call the API with (e.g., "gemini-1.5-pro-search")
|
150 |
+
api_model_string = request.model
|
151 |
+
|
152 |
if is_grounded_search:
|
153 |
search_tool = types.Tool(google_search=types.GoogleSearch())
|
154 |
generation_config["tools"] = [search_tool]
|
155 |
elif is_encrypted_model:
|
156 |
+
generation_config["system_instruction"] = encryption_instructions_placeholder
|
157 |
current_prompt_func = create_encrypted_gemini_prompt
|
158 |
elif is_encrypted_full_model:
|
159 |
+
generation_config["system_instruction"] = encryption_instructions_placeholder
|
160 |
current_prompt_func = create_encrypted_full_gemini_prompt
|
161 |
elif is_nothinking_model:
|
162 |
generation_config["thinking_config"] = {"thinking_budget": 0}
|
163 |
elif is_max_thinking_model:
|
164 |
generation_config["thinking_config"] = {"thinking_budget": 24576}
|
165 |
|
166 |
+
# For non-auto models, the 'base_model_name' might have suffix stripped.
|
167 |
+
# We should use the original 'request.model' for API call if it's a suffixed one,
|
168 |
+
# or 'base_model_name' if it's truly a base model without suffixes.
|
169 |
+
# The current logic uses 'base_model_name' for the API call in the 'else' block.
|
170 |
+
# This means if `request.model` was "gemini-1.5-pro-search", `base_model_name` becomes "gemini-1.5-pro"
|
171 |
+
# but the API call might need the full "gemini-1.5-pro-search".
|
172 |
+
# Let's use `request.model` for the API call here, and `base_model_name` for checks like Express eligibility.
|
173 |
+
return await execute_gemini_call(client_to_use, api_model_string, current_prompt_func, generation_config, request)
|
174 |
|
175 |
except Exception as e:
|
176 |
error_msg = f"Unexpected error in chat_completions endpoint: {str(e)}"
|
app/routes/models_api.py
CHANGED
@@ -1,49 +1,86 @@
|
|
1 |
import time
|
2 |
-
from fastapi import APIRouter, Depends
|
3 |
-
|
4 |
-
|
5 |
-
from
|
|
|
|
|
6 |
|
7 |
router = APIRouter()
|
8 |
|
9 |
@router.get("/v1/models")
|
10 |
-
async def list_models(api_key: str = Depends(get_api_key)):
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
#
|
45 |
-
for
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import time
|
2 |
+
from fastapi import APIRouter, Depends, Request # Added Request
|
3 |
+
from typing import List, Dict, Any
|
4 |
+
from auth import get_api_key
|
5 |
+
from model_loader import get_vertex_models, get_vertex_express_models, refresh_models_config_cache
|
6 |
+
import config as app_config # Import config
|
7 |
+
from credentials_manager import CredentialManager # To check its type
|
8 |
|
9 |
router = APIRouter()
|
10 |
|
11 |
@router.get("/v1/models")
|
12 |
+
async def list_models(fastapi_request: Request, api_key: str = Depends(get_api_key)):
|
13 |
+
await refresh_models_config_cache()
|
14 |
+
|
15 |
+
# Access credential_manager from app state
|
16 |
+
credential_manager_instance: CredentialManager = fastapi_request.app.state.credential_manager
|
17 |
+
|
18 |
+
has_sa_creds = credential_manager_instance.get_total_credentials() > 0
|
19 |
+
has_express_key = bool(app_config.VERTEX_EXPRESS_API_KEY_VAL)
|
20 |
+
|
21 |
+
raw_vertex_models = await get_vertex_models()
|
22 |
+
raw_express_models = await get_vertex_express_models()
|
23 |
+
|
24 |
+
candidate_model_ids = set()
|
25 |
+
|
26 |
+
if has_express_key:
|
27 |
+
candidate_model_ids.update(raw_express_models)
|
28 |
+
# If *only* express key is available, only express models (and their variants) should be listed.
|
29 |
+
# The current `vertex_model_ids` from remote config might contain non-express models.
|
30 |
+
# The `get_vertex_express_models()` should be the source of truth for express-eligible base models.
|
31 |
+
if not has_sa_creds:
|
32 |
+
# Only list models that are explicitly in the express list.
|
33 |
+
# Suffix generation will apply only to these if they are not gemini-2.0
|
34 |
+
all_model_ids = set(raw_express_models)
|
35 |
+
else:
|
36 |
+
# Both SA and Express are available, combine all known models
|
37 |
+
all_model_ids = set(raw_vertex_models + raw_express_models)
|
38 |
+
elif has_sa_creds:
|
39 |
+
# Only SA creds available, use all vertex_models (which might include express-eligible ones)
|
40 |
+
all_model_ids = set(raw_vertex_models)
|
41 |
+
else:
|
42 |
+
# No credentials available
|
43 |
+
all_model_ids = set()
|
44 |
+
|
45 |
+
# Create extended model list with variations (search, encrypt, auto etc.)
|
46 |
+
# This logic might need to be more sophisticated based on actual supported features per base model.
|
47 |
+
# For now, let's assume for each base model, we might have these variations.
|
48 |
+
# A better approach would be if the remote config specified these variations.
|
49 |
+
|
50 |
+
dynamic_models_data: List[Dict[str, Any]] = []
|
51 |
+
current_time = int(time.time())
|
52 |
+
|
53 |
+
# Add base models and their variations
|
54 |
+
for model_id in sorted(list(all_model_ids)):
|
55 |
+
dynamic_models_data.append({
|
56 |
+
"id": model_id, "object": "model", "created": current_time, "owned_by": "google",
|
57 |
+
"permission": [], "root": model_id, "parent": None
|
58 |
+
})
|
59 |
+
|
60 |
+
# Conditionally add common variations (standard suffixes)
|
61 |
+
if not model_id.startswith("gemini-2.0"):
|
62 |
+
standard_suffixes = ["-search", "-encrypt", "-encrypt-full", "-auto"]
|
63 |
+
for suffix in standard_suffixes:
|
64 |
+
suffixed_id = f"{model_id}{suffix}"
|
65 |
+
# Check if this suffixed ID is already in all_model_ids (fetched from remote) or already added to dynamic_models_data
|
66 |
+
if suffixed_id not in all_model_ids and not any(m['id'] == suffixed_id for m in dynamic_models_data):
|
67 |
+
dynamic_models_data.append({
|
68 |
+
"id": suffixed_id, "object": "model", "created": current_time, "owned_by": "google",
|
69 |
+
"permission": [], "root": model_id, "parent": None
|
70 |
+
})
|
71 |
+
|
72 |
+
# Apply special suffixes for models starting with "gemini-2.5-flash"
|
73 |
+
if model_id.startswith("gemini-2.5-flash"):
|
74 |
+
special_flash_suffixes = ["-nothinking", "-max"]
|
75 |
+
for special_suffix in special_flash_suffixes:
|
76 |
+
suffixed_id = f"{model_id}{special_suffix}"
|
77 |
+
if suffixed_id not in all_model_ids and not any(m['id'] == suffixed_id for m in dynamic_models_data):
|
78 |
+
dynamic_models_data.append({
|
79 |
+
"id": suffixed_id, "object": "model", "created": current_time, "owned_by": "google",
|
80 |
+
"permission": [], "root": model_id, "parent": None
|
81 |
+
})
|
82 |
+
|
83 |
+
# Ensure uniqueness again after adding suffixes
|
84 |
+
final_models_data_map = {m["id"]: m for m in dynamic_models_data}
|
85 |
+
|
86 |
+
return {"object": "list", "data": list(final_models_data_map.values())}
|
app/vertex_ai_init.py
CHANGED
@@ -1,20 +1,17 @@
|
|
1 |
import json
|
|
|
2 |
from google import genai
|
3 |
-
from credentials_manager import CredentialManager, parse_multiple_json_credentials
|
4 |
-
import config as app_config
|
|
|
5 |
|
6 |
-
#
|
7 |
-
VERTEX_EXPRESS_MODELS
|
8 |
-
|
9 |
-
"gemini-2.0-flash-lite-001",
|
10 |
-
"gemini-2.5-pro-preview-03-25",
|
11 |
-
"gemini-2.5-flash-preview-04-17",
|
12 |
-
"gemini-2.5-pro-preview-05-06",
|
13 |
-
]
|
14 |
|
15 |
# Global 'client' and 'get_vertex_client()' are removed.
|
16 |
|
17 |
-
def init_vertex_ai(credential_manager_instance: CredentialManager) -> bool:
|
18 |
"""
|
19 |
Initializes the credential manager with credentials from GOOGLE_CREDENTIALS_JSON (if provided)
|
20 |
and verifies if any credentials (environment or file-based through the manager) are available.
|
@@ -65,6 +62,16 @@ def init_vertex_ai(credential_manager_instance: CredentialManager) -> bool:
|
|
65 |
else:
|
66 |
print("INFO: GOOGLE_CREDENTIALS_JSON environment variable not found.")
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
# CredentialManager's __init__ calls load_credentials_list() for files.
|
69 |
# refresh_credentials_list() re-scans files and combines with in-memory (already includes env creds if loaded above).
|
70 |
# The return value of refresh_credentials_list indicates if total > 0
|
|
|
1 |
import json
|
2 |
+
import asyncio # Added for await
|
3 |
from google import genai
|
4 |
+
from credentials_manager import CredentialManager, parse_multiple_json_credentials
|
5 |
+
import config as app_config
|
6 |
+
from model_loader import refresh_models_config_cache # Import new model loader function
|
7 |
|
8 |
+
# VERTEX_EXPRESS_MODELS list is now dynamically loaded via model_loader
|
9 |
+
# The constant VERTEX_EXPRESS_MODELS previously defined here is removed.
|
10 |
+
# Consumers should use get_vertex_express_models() from model_loader.
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
# Global 'client' and 'get_vertex_client()' are removed.
|
13 |
|
14 |
+
async def init_vertex_ai(credential_manager_instance: CredentialManager) -> bool: # Made async
|
15 |
"""
|
16 |
Initializes the credential manager with credentials from GOOGLE_CREDENTIALS_JSON (if provided)
|
17 |
and verifies if any credentials (environment or file-based through the manager) are available.
|
|
|
62 |
else:
|
63 |
print("INFO: GOOGLE_CREDENTIALS_JSON environment variable not found.")
|
64 |
|
65 |
+
# Attempt to pre-warm the model configuration cache
|
66 |
+
print("INFO: Attempting to pre-warm model configuration cache during startup...")
|
67 |
+
models_loaded_successfully = await refresh_models_config_cache()
|
68 |
+
if models_loaded_successfully:
|
69 |
+
print("INFO: Model configuration cache pre-warmed successfully.")
|
70 |
+
else:
|
71 |
+
print("WARNING: Failed to pre-warm model configuration cache during startup. It will be loaded lazily on first request.")
|
72 |
+
# We don't necessarily fail the entire init_vertex_ai if model list fetching fails,
|
73 |
+
# as credential validation might still be important, and model list can be fetched later.
|
74 |
+
|
75 |
# CredentialManager's __init__ calls load_credentials_list() for files.
|
76 |
# refresh_credentials_list() re-scans files and combines with in-memory (already includes env creds if loaded above).
|
77 |
# The return value of refresh_credentials_list indicates if total > 0
|