vertex

Running

App Files Files Community

bibibi12345 commited on 27 days ago

Commit

d342ca5

1 Parent(s): 0527a50

added openai mode for express

Browse files

Files changed (4) hide show

app/model_loader.py +1 -3
app/openai_handler.py +148 -44
app/routes/chat_api.py +10 -5
app/routes/models_api.py +47 -107

app/model_loader.py CHANGED Viewed

@@ -33,11 +33,9 @@ async def fetch_and_parse_models_config() -> Optional[Dict[str, List[str]]]:
                 print("Successfully fetched and parsed model configuration.")
                 # Add [EXPRESS] prefix to express models
-                prefixed_express_models = [f"[EXPRESS] {model_name}" for model_name in data["vertex_express_models"]]
                 return {
                     "vertex_models": data["vertex_models"],
-                    "vertex_express_models": prefixed_express_models
                 }
             else:
                 print(f"ERROR: Fetched model configuration has an invalid structure: {data}")

                 print("Successfully fetched and parsed model configuration.")
                 # Add [EXPRESS] prefix to express models
                 return {
                     "vertex_models": data["vertex_models"],
+                    "vertex_express_models": data["vertex_express_models"]
                 }
             else:
                 print(f"ERROR: Fetched model configuration has an invalid structure: {data}")

app/openai_handler.py CHANGED Viewed

@@ -5,7 +5,8 @@ This module encapsulates all OpenAI-specific logic that was previously in chat_a
 import json
 import time
 import asyncio
-from typing import Dict, Any, AsyncGenerator
 from fastapi.responses import JSONResponse, StreamingResponse
 import openai
@@ -21,13 +22,104 @@ from api_helpers import (
 )
 from message_processing import extract_reasoning_by_tags
 from credentials_manager import _refresh_auth
 class OpenAIDirectHandler:
     """Handles OpenAI Direct mode operations including client creation and response processing."""
-    def __init__(self, credential_manager):
         self.credential_manager = credential_manager
         self.safety_settings = [
             {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "OFF"},
             {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "OFF"},
@@ -35,7 +127,7 @@ class OpenAIDirectHandler:
             {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
             {"category": 'HARM_CATEGORY_CIVIC_INTEGRITY', "threshold": 'OFF'}
         ]
     def create_openai_client(self, project_id: str, gcp_token: str, location: str = "global") -> openai.AsyncOpenAI:
         """Create an OpenAI client configured for Vertex AI endpoint."""
         endpoint_url = (
@@ -80,7 +172,7 @@ class OpenAIDirectHandler:
     async def handle_streaming_response(
         self,
-        openai_client: openai.AsyncOpenAI,
         openai_params: Dict[str, Any],
         openai_extra_body: Dict[str, Any],
         request: OpenAIRequest
@@ -107,7 +199,7 @@ class OpenAIDirectHandler:
     async def _true_stream_generator(
         self,
-        openai_client: openai.AsyncOpenAI,
         openai_params: Dict[str, Any],
         openai_extra_body: Dict[str, Any],
         request: OpenAIRequest
@@ -136,6 +228,7 @@ class OpenAIDirectHandler:
                         delta = choices[0].get('delta')
                         if delta and isinstance(delta, dict):
                             # Always remove extra_content if present
                             if 'extra_content' in delta:
                                 del delta['extra_content']
@@ -242,7 +335,7 @@ class OpenAIDirectHandler:
     async def handle_non_streaming_response(
         self,
-        openai_client: openai.AsyncOpenAI,
         openai_params: Dict[str, Any],
         openai_extra_body: Dict[str, Any],
         request: OpenAIRequest
@@ -296,44 +389,55 @@ class OpenAIDirectHandler:
                 content=create_openai_error_response(500, error_msg, "server_error")
             )
-    async def process_request(self, request: OpenAIRequest, base_model_name: str):
         """Main entry point for processing OpenAI Direct mode requests."""
-        print(f"INFO: Using OpenAI Direct Path for model: {request.model}")
-        # Get credentials
-        rotated_credentials, rotated_project_id = self.credential_manager.get_credentials()
-        if not rotated_credentials or not rotated_project_id:
-            error_msg = "OpenAI Direct Mode requires GCP credentials, but none were available or loaded successfully."
-            print(f"ERROR: {error_msg}")
-            return JSONResponse(
-                status_code=500,
-                content=create_openai_error_response(500, error_msg, "server_error")
-            )
-        print(f"INFO: [OpenAI Direct Path] Using credentials for project: {rotated_project_id}")
-        gcp_token = _refresh_auth(rotated_credentials)
-        if not gcp_token:
-            error_msg = f"Failed to obtain valid GCP token for OpenAI client (Project: {rotated_project_id})."
             print(f"ERROR: {error_msg}")
-            return JSONResponse(
-                status_code=500,
-                content=create_openai_error_response(500, error_msg, "server_error")
-            )
-        # Create client and prepare parameters
-        openai_client = self.create_openai_client(rotated_project_id, gcp_token)
-        model_id = f"google/{base_model_name}"
-        openai_params = self.prepare_openai_params(request, model_id)
-        openai_extra_body = self.prepare_extra_body()
-        # Handle streaming vs non-streaming
-        if request.stream:
-            return await self.handle_streaming_response(
-                openai_client, openai_params, openai_extra_body, request
-            )
-        else:
-            return await self.handle_non_streaming_response(
-                openai_client, openai_params, openai_extra_body, request
-            )

 import json
 import time
 import asyncio
+import httpx
+from typing import Dict, Any, AsyncGenerator, Optional
 from fastapi.responses import JSONResponse, StreamingResponse
 import openai
 )
 from message_processing import extract_reasoning_by_tags
 from credentials_manager import _refresh_auth
+from project_id_discovery import discover_project_id
+# Wrapper classes to mimic OpenAI SDK responses for direct httpx calls
+class FakeChatCompletionChunk:
+    """A fake ChatCompletionChunk to wrap the dictionary from a direct API stream."""
+    def __init__(self, data: Dict[str, Any]):
+        self._data = data
+    def model_dump(self, exclude_unset=True, exclude_none=True) -> Dict[str, Any]:
+        return self._data
+class FakeChatCompletion:
+    """A fake ChatCompletion to wrap the dictionary from a direct non-streaming API call."""
+    def __init__(self, data: Dict[str, Any]):
+        self._data = data
+    def model_dump(self, exclude_unset=True, exclude_none=True) -> Dict[str, Any]:
+        return self._data
+class ExpressClientWrapper:
+    """
+    A wrapper that mimics the openai.AsyncOpenAI client interface but uses direct
+    httpx calls for Vertex AI Express Mode. This allows it to be used with the
+    existing response handling logic.
+    """
+    def __init__(self, project_id: str, api_key: str, location: str = "global"):
+        self.project_id = project_id
+        self.api_key = api_key
+        self.location = location
+        self.base_url = f"https://aiplatform.googleapis.com/v1beta1/projects/{self.project_id}/locations/{self.location}/endpoints/openapi"
+        # The 'chat.completions' structure mimics the real OpenAI client
+        self.chat = self
+        self.completions = self
+    async def _stream_generator(self, response: httpx.Response) -> AsyncGenerator[FakeChatCompletionChunk, None]:
+        """Processes the SSE stream from httpx and yields fake chunk objects."""
+        async for line in response.aiter_lines():
+            if line.startswith("data:"):
+                json_str = line[len("data: "):].strip()
+                if json_str == "[DONE]":
+                    break
+                try:
+                    data = json.loads(json_str)
+                    yield FakeChatCompletionChunk(data)
+                except json.JSONDecodeError:
+                    print(f"Warning: Could not decode JSON from stream line: {json_str}")
+                    continue
+    async def _streaming_create(self, **kwargs) -> AsyncGenerator[FakeChatCompletionChunk, None]:
+        """Handles the creation of a streaming request using httpx."""
+        endpoint = f"{self.base_url}/chat/completions"
+        headers = {"Content-Type": "application/json"}
+        params = {"key": self.api_key}
+        payload = kwargs.copy()
+        if 'extra_body' in payload:
+            payload.update(payload.pop('extra_body'))
+        async with httpx.AsyncClient(timeout=300) as client:
+            async with client.stream("POST", endpoint, headers=headers, params=params, json=payload, timeout=None) as response:
+                response.raise_for_status()
+                async for chunk in self._stream_generator(response):
+                    yield chunk
+    async def create(self, **kwargs) -> Any:
+        """
+        Mimics the 'create' method of the OpenAI client.
+        It builds and sends a direct HTTP request using httpx, delegating
+        to the appropriate streaming or non-streaming handler.
+        """
+        is_streaming = kwargs.get("stream", False)
+        if is_streaming:
+            return self._streaming_create(**kwargs)
+        # Non-streaming logic
+        endpoint = f"{self.base_url}/chat/completions"
+        headers = {"Content-Type": "application/json"}
+        params = {"key": self.api_key}
+        payload = kwargs.copy()
+        if 'extra_body' in payload:
+            payload.update(payload.pop('extra_body'))
+        async with httpx.AsyncClient(timeout=300) as client:
+            response = await client.post(endpoint, headers=headers, params=params, json=payload, timeout=None)
+            response.raise_for_status()
+            return FakeChatCompletion(response.json())
 class OpenAIDirectHandler:
     """Handles OpenAI Direct mode operations including client creation and response processing."""
+    def __init__(self, credential_manager=None, express_key_manager=None):
         self.credential_manager = credential_manager
+        self.express_key_manager = express_key_manager
         self.safety_settings = [
             {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "OFF"},
             {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "OFF"},
             {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
             {"category": 'HARM_CATEGORY_CIVIC_INTEGRITY', "threshold": 'OFF'}
         ]
     def create_openai_client(self, project_id: str, gcp_token: str, location: str = "global") -> openai.AsyncOpenAI:
         """Create an OpenAI client configured for Vertex AI endpoint."""
         endpoint_url = (
     async def handle_streaming_response(
         self,
+        openai_client: Any, # Can be openai.AsyncOpenAI or our wrapper
         openai_params: Dict[str, Any],
         openai_extra_body: Dict[str, Any],
         request: OpenAIRequest
     async def _true_stream_generator(
         self,
+        openai_client: Any, # Can be openai.AsyncOpenAI or our wrapper
         openai_params: Dict[str, Any],
         openai_extra_body: Dict[str, Any],
         request: OpenAIRequest
                         delta = choices[0].get('delta')
                         if delta and isinstance(delta, dict):
                             # Always remove extra_content if present
                             if 'extra_content' in delta:
                                 del delta['extra_content']
     async def handle_non_streaming_response(
         self,
+        openai_client: Any, # Can be openai.AsyncOpenAI or our wrapper
         openai_params: Dict[str, Any],
         openai_extra_body: Dict[str, Any],
         request: OpenAIRequest
                 content=create_openai_error_response(500, error_msg, "server_error")
             )
+    async def process_request(self, request: OpenAIRequest, base_model_name: str, is_express: bool = False):
         """Main entry point for processing OpenAI Direct mode requests."""
+        print(f"INFO: Using OpenAI Direct Path for model: {request.model} (Express: {is_express})")
+        client: Any = None # Can be openai.AsyncOpenAI or our wrapper
+        try:
+            if is_express:
+                if not self.express_key_manager:
+                    raise Exception("Express mode requires an ExpressKeyManager, but it was not provided.")
+                key_tuple = self.express_key_manager.get_express_api_key()
+                if not key_tuple:
+                    raise Exception("OpenAI Express Mode requires an API key, but none were available.")
+                _, express_api_key = key_tuple
+                project_id = await discover_project_id(express_api_key)
+                client = ExpressClientWrapper(project_id=project_id, api_key=express_api_key)
+                print(f"INFO: [OpenAI Express Path] Using ExpressClientWrapper for project: {project_id}")
+            else: # Standard SA-based OpenAI SDK Path
+                if not self.credential_manager:
+                    raise Exception("Standard OpenAI Direct mode requires a CredentialManager.")
+                rotated_credentials, rotated_project_id = self.credential_manager.get_credentials()
+                if not rotated_credentials or not rotated_project_id:
+                    raise Exception("OpenAI Direct Mode requires GCP credentials, but none were available.")
+                print(f"INFO: [OpenAI Direct Path] Using credentials for project: {rotated_project_id}")
+                gcp_token = _refresh_auth(rotated_credentials)
+                if not gcp_token:
+                    raise Exception(f"Failed to obtain valid GCP token for OpenAI client (Project: {rotated_project_id}).")
+                client = self.create_openai_client(rotated_project_id, gcp_token)
+            model_id = f"google/{base_model_name}"
+            openai_params = self.prepare_openai_params(request, model_id)
+            openai_extra_body = self.prepare_extra_body()
+            if request.stream:
+                return await self.handle_streaming_response(
+                    client, openai_params, openai_extra_body, request
+                )
+            else:
+                return await self.handle_non_streaming_response(
+                    client, openai_params, openai_extra_body, request
+                )
+        except Exception as e:
+            error_msg = f"Error in process_request for {request.model}: {e}"
             print(f"ERROR: {error_msg}")
+            return JSONResponse(status_code=500, content=create_openai_error_response(500, error_msg, "server_error"))

app/routes/chat_api.py CHANGED Viewed

@@ -46,9 +46,10 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
         is_openai_direct_model = False
         if request.model.endswith(OPENAI_DIRECT_SUFFIX):
             temp_name_for_marker_check = request.model[:-len(OPENAI_DIRECT_SUFFIX)]
-            if temp_name_for_marker_check.startswith(PAY_PREFIX):
-                is_openai_direct_model = True
-            elif EXPERIMENTAL_MARKER in temp_name_for_marker_check:
                 is_openai_direct_model = True
         is_auto_model = request.model.endswith("-auto")
         is_grounded_search = request.model.endswith("-search")
@@ -175,8 +176,12 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
         if is_openai_direct_model:
             # Use the new OpenAI handler
-            openai_handler = OpenAIDirectHandler(credential_manager_instance)
-            return await openai_handler.process_request(request, base_model_name)
         elif is_auto_model:
             print(f"Processing auto model: {request.model}")
             attempts = [

         is_openai_direct_model = False
         if request.model.endswith(OPENAI_DIRECT_SUFFIX):
             temp_name_for_marker_check = request.model[:-len(OPENAI_DIRECT_SUFFIX)]
+            # An OpenAI model can be prefixed with PAY, EXPRESS, or contain EXP
+            if temp_name_for_marker_check.startswith(PAY_PREFIX) or \
+               temp_name_for_marker_check.startswith(EXPRESS_PREFIX) or \
+               EXPERIMENTAL_MARKER in temp_name_for_marker_check:
                 is_openai_direct_model = True
         is_auto_model = request.model.endswith("-auto")
         is_grounded_search = request.model.endswith("-search")
         if is_openai_direct_model:
             # Use the new OpenAI handler
+            if is_express_model_request:
+                openai_handler = OpenAIDirectHandler(express_key_manager=express_key_manager_instance)
+                return await openai_handler.process_request(request, base_model_name, is_express=True)
+            else:
+                openai_handler = OpenAIDirectHandler(credential_manager=credential_manager_instance)
+                return await openai_handler.process_request(request, base_model_name)
         elif is_auto_model:
             print(f"Processing auto model: {request.model}")
             attempts = [

app/routes/models_api.py CHANGED Viewed

@@ -1,10 +1,10 @@
 import time
-from fastapi import APIRouter, Depends, Request # Added Request
-from typing import List, Dict, Any
 from auth import get_api_key
 from model_loader import get_vertex_models, get_vertex_express_models, refresh_models_config_cache
-import config as app_config # Import config
-from credentials_manager import CredentialManager # To check its type
 router = APIRouter()
@@ -12,10 +12,10 @@ router = APIRouter()
 async def list_models(fastapi_request: Request, api_key: str = Depends(get_api_key)):
     await refresh_models_config_cache()
-    OPENAI_DIRECT_SUFFIX = "-openai"
-    EXPERIMENTAL_MARKER = "-exp-"
     PAY_PREFIX = "[PAY]"
-    # Access credential_manager from app state
     credential_manager_instance: CredentialManager = fastapi_request.app.state.credential_manager
     express_key_manager_instance = fastapi_request.app.state.express_key_manager
@@ -25,109 +25,49 @@ async def list_models(fastapi_request: Request, api_key: str = Depends(get_api_k
     raw_vertex_models = await get_vertex_models()
     raw_express_models = await get_vertex_express_models()
-    candidate_model_ids = set()
-    raw_vertex_models_set = set(raw_vertex_models)  # For checking origin during prefixing
-    if has_express_key:
-        candidate_model_ids.update(raw_express_models)
-        # If *only* express key is available, only express models (and their variants) should be listed.
-        # The current `vertex_model_ids` from remote config might contain non-express models.
-        # The `get_vertex_express_models()` should be the source of truth for express-eligible base models.
-        if not has_sa_creds:
-            # Only list models that are explicitly in the express list.
-            # Suffix generation will apply only to these if they are not gemini-2.0
-            all_model_ids = set(raw_express_models)
-        else:
-            # Both SA and Express are available, combine all known models
-            all_model_ids = set(raw_vertex_models + raw_express_models)
-    elif has_sa_creds:
-        # Only SA creds available, use all vertex_models (which might include express-eligible ones)
-        all_model_ids = set(raw_vertex_models)
-    else:
-        # No credentials available
-        all_model_ids = set()
-    # Create extended model list with variations (search, encrypt, auto etc.)
-    # This logic might need to be more sophisticated based on actual supported features per base model.
-    # For now, let's assume for each base model, we might have these variations.
-    # A better approach would be if the remote config specified these variations.
-    dynamic_models_data: List[Dict[str, Any]] = []
     current_time = int(time.time())
-    # Add base models and their variations
-    for original_model_id in sorted(list(all_model_ids)):
-        current_display_prefix = ""
-        # Only add PAY_PREFIX if the model is not already an EXPRESS model (which has its own prefix)
-        # Apply PAY_PREFIX if SA creds are present, it's a model from raw_vertex_models,
-        # it's not experimental, and not already an EXPRESS model.
-        if has_sa_creds and \
-           original_model_id in raw_vertex_models_set and \
-           EXPERIMENTAL_MARKER not in original_model_id and \
-           not original_model_id.startswith("[EXPRESS]"):
-            current_display_prefix = PAY_PREFIX
-        base_display_id = f"{current_display_prefix}{original_model_id}"
-        dynamic_models_data.append({
-            "id": base_display_id, "object": "model", "created": current_time, "owned_by": "google",
-            "permission": [], "root": original_model_id, "parent": None
-        })
-        # Conditionally add common variations (standard suffixes)
-        if not original_model_id.startswith("gemini-2.0"): # Suffix rules based on original_model_id
-            standard_suffixes = ["-search", "-encrypt", "-encrypt-full", "-auto"]
-            for suffix in standard_suffixes:
-                # Suffix is applied to the original model ID part
-                suffixed_model_part = f"{original_model_id}{suffix}"
-                # Then the whole thing is prefixed
-                final_suffixed_display_id = f"{current_display_prefix}{suffixed_model_part}"
-                # Check if this suffixed ID is already in all_model_ids (unlikely with prefix) or already added
-                if final_suffixed_display_id not in all_model_ids and not any(m['id'] == final_suffixed_display_id for m in dynamic_models_data):
-                    dynamic_models_data.append({
-                        "id": final_suffixed_display_id, "object": "model", "created": current_time, "owned_by": "google",
-                        "permission": [], "root": original_model_id, "parent": None
-                    })
-        # Apply special suffixes for models starting with "gemini-2.5-flash" or containing "gemini-2.5-pro"
-        # This includes both regular and EXPRESS versions
-        if "gemini-2.5-flash" in original_model_id or "gemini-2.5-pro" in original_model_id: # Suffix rules based on original_model_id
-            special_thinking_suffixes = ["-nothinking", "-max"]
-            for special_suffix in special_thinking_suffixes:
-                suffixed_model_part = f"{original_model_id}{special_suffix}"
-                final_special_suffixed_display_id = f"{current_display_prefix}{suffixed_model_part}"
-                if final_special_suffixed_display_id not in all_model_ids and not any(m['id'] == final_special_suffixed_display_id for m in dynamic_models_data):
-                    dynamic_models_data.append({
-                        "id": final_special_suffixed_display_id, "object": "model", "created": current_time, "owned_by": "google",
-                        "permission": [], "root": original_model_id, "parent": None
-                    })
-        # Ensure uniqueness again after adding suffixes
-        # Add OpenAI direct variations if SA creds are available
-        if has_sa_creds: # OpenAI direct mode only works with SA credentials
-            # `all_model_ids` contains the comprehensive list of base models that are eligible based on current credentials
-            # We iterate through this to determine which ones get an -openai variation.
-            # `raw_vertex_models` is used here to ensure we only add -openai suffix to models that are
-            # fundamentally Vertex models, not just any model that might appear in `all_model_ids` (e.g. from Express list exclusively)
-            # if express only key is provided.
-            # We iterate through the base models from the main Vertex list.
-            for base_model_id_for_openai in raw_vertex_models: # Iterate through original list of GAIA/Vertex base models
-                display_model_id = ""
-                if EXPERIMENTAL_MARKER in base_model_id_for_openai:
-                    display_model_id = f"{base_model_id_for_openai}{OPENAI_DIRECT_SUFFIX}"
-                else:
-                    display_model_id = f"{PAY_PREFIX}{base_model_id_for_openai}{OPENAI_DIRECT_SUFFIX}"
-                # Check if already added (e.g. if remote config somehow already listed it or added as a base model)
-                if display_model_id and not any(m['id'] == display_model_id for m in dynamic_models_data):
-                    dynamic_models_data.append({
-                        "id": display_model_id, "object": "model", "created": current_time, "owned_by": "google",
-                        "permission": [], "root": base_model_id_for_openai, "parent": None
-                    })
-    # final_models_data_map = {m["id"]: m for m in dynamic_models_data}
-    # model_list = list(final_models_data_map.values())
-    # model_list.sort()
-    return {"object": "list", "data": sorted(dynamic_models_data, key=lambda x: x['id'])}

 import time
+from fastapi import APIRouter, Depends, Request
+from typing import List, Dict, Any, Set
 from auth import get_api_key
 from model_loader import get_vertex_models, get_vertex_express_models, refresh_models_config_cache
+import config as app_config
+from credentials_manager import CredentialManager
 router = APIRouter()
 async def list_models(fastapi_request: Request, api_key: str = Depends(get_api_key)):
     await refresh_models_config_cache()
     PAY_PREFIX = "[PAY]"
+    EXPRESS_PREFIX = "[EXPRESS] "
+    OPENAI_DIRECT_SUFFIX = "-openai"
     credential_manager_instance: CredentialManager = fastapi_request.app.state.credential_manager
     express_key_manager_instance = fastapi_request.app.state.express_key_manager
     raw_vertex_models = await get_vertex_models()
     raw_express_models = await get_vertex_express_models()
+    final_model_list: List[Dict[str, Any]] = []
+    processed_ids: Set[str] = set()
     current_time = int(time.time())
+    def add_model_and_variants(base_id: str, prefix: str):
+        """Adds a model and its variants to the list if not already present."""
+        # Define all possible suffixes for a given model
+        suffixes = [""] # For the base model itself
+        if not base_id.startswith("gemini-2.0"):
+            suffixes.extend(["-search", "-encrypt", "-encrypt-full", "-auto"])
+        if "gemini-2.5-flash" in base_id or "gemini-2.5-pro" in base_id:
+            suffixes.extend(["-nothinking", "-max"])
+        # Add the openai variant for all models
+        suffixes.append(OPENAI_DIRECT_SUFFIX)
+        for suffix in suffixes:
+            model_id_with_suffix = f"{base_id}{suffix}"
+            # Experimental models have no prefix
+            final_id = f"{prefix}{model_id_with_suffix}" if "-exp-" not in base_id else model_id_with_suffix
+            if final_id not in processed_ids:
+                final_model_list.append({
+                    "id": final_id,
+                    "object": "model",
+                    "created": current_time,
+                    "owned_by": "google",
+                    "permission": [],
+                    "root": base_id,
+                    "parent": None
+                })
+                processed_ids.add(final_id)
+    # Process Express Key models first
+    if has_express_key:
+        for model_id in raw_express_models:
+            add_model_and_variants(model_id, EXPRESS_PREFIX)
+    # Process Service Account (PAY) models, they have lower priority
+    if has_sa_creds:
+        for model_id in raw_vertex_models:
+            add_model_and_variants(model_id, PAY_PREFIX)
+    return {"object": "list", "data": sorted(final_model_list, key=lambda x: x['id'])}