Commit
·
0527a50
1
Parent(s):
ef7065d
fixed bugs
Browse files- app/api_helpers.py +1 -1
- app/routes/chat_api.py +11 -19
app/api_helpers.py
CHANGED
@@ -254,7 +254,7 @@ def is_gemini_response_valid(response: Any) -> bool:
|
|
254 |
if hasattr(response, 'text') and isinstance(response.text, str) and response.text.strip():
|
255 |
return True
|
256 |
|
257 |
-
# Check for candidates
|
258 |
if hasattr(response, 'candidates') and response.candidates:
|
259 |
for candidate in response.candidates:
|
260 |
# Check for direct text on candidate
|
|
|
254 |
if hasattr(response, 'text') and isinstance(response.text, str) and response.text.strip():
|
255 |
return True
|
256 |
|
257 |
+
# Check for candidates in the response
|
258 |
if hasattr(response, 'candidates') and response.candidates:
|
259 |
for candidate in response.candidates:
|
260 |
# Check for direct text on candidate
|
app/routes/chat_api.py
CHANGED
@@ -24,7 +24,7 @@ from api_helpers import (
|
|
24 |
execute_gemini_call,
|
25 |
)
|
26 |
from openai_handler import OpenAIDirectHandler
|
27 |
-
from
|
28 |
|
29 |
router = APIRouter()
|
30 |
|
@@ -118,9 +118,15 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
|
|
118 |
try:
|
119 |
# Check if model contains "gemini-2.5-pro" or "gemini-2.5-flash" for direct URL approach
|
120 |
if "gemini-2.5-pro" in base_model_name or "gemini-2.5-flash" in base_model_name:
|
121 |
-
|
122 |
-
|
123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
else:
|
125 |
client_to_use = genai.Client(vertexai=True, api_key=key_val)
|
126 |
print(f"INFO: Attempt {attempt+1}/{total_keys} - Using Vertex Express Mode SDK for model {request.model} (base: {base_model_name}) with API key (original index: {original_idx}).")
|
@@ -185,9 +191,6 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
|
|
185 |
try:
|
186 |
# Pass is_auto_attempt=True for auto-mode calls
|
187 |
result = await execute_gemini_call(client_to_use, attempt["model"], attempt["prompt_func"], current_gen_config, request, is_auto_attempt=True)
|
188 |
-
# Clean up DirectVertexClient session if used
|
189 |
-
if isinstance(client_to_use, DirectVertexClient):
|
190 |
-
await client_to_use.close()
|
191 |
return result
|
192 |
except Exception as e_auto:
|
193 |
last_err = e_auto
|
@@ -196,9 +199,6 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
|
|
196 |
|
197 |
print(f"All auto attempts failed. Last error: {last_err}")
|
198 |
err_msg = f"All auto-mode attempts failed for model {request.model}. Last error: {str(last_err)}"
|
199 |
-
# Clean up DirectVertexClient session if used
|
200 |
-
if isinstance(client_to_use, DirectVertexClient):
|
201 |
-
await client_to_use.close()
|
202 |
if not request.stream and last_err:
|
203 |
return JSONResponse(status_code=500, content=create_openai_error_response(500, err_msg, "server_error"))
|
204 |
elif request.stream:
|
@@ -245,17 +245,9 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
|
|
245 |
# but the API call might need the full "gemini-1.5-pro-search".
|
246 |
# Let's use `request.model` for the API call here, and `base_model_name` for checks like Express eligibility.
|
247 |
# For non-auto mode, is_auto_attempt defaults to False in execute_gemini_call
|
248 |
-
|
249 |
-
return await execute_gemini_call(client_to_use, base_model_name, current_prompt_func, generation_config, request)
|
250 |
-
finally:
|
251 |
-
# Clean up DirectVertexClient session if used
|
252 |
-
if isinstance(client_to_use, DirectVertexClient):
|
253 |
-
await client_to_use.close()
|
254 |
|
255 |
except Exception as e:
|
256 |
error_msg = f"Unexpected error in chat_completions endpoint: {str(e)}"
|
257 |
print(error_msg)
|
258 |
-
# Clean up DirectVertexClient session if it exists
|
259 |
-
if 'client_to_use' in locals() and isinstance(client_to_use, DirectVertexClient):
|
260 |
-
await client_to_use.close()
|
261 |
return JSONResponse(status_code=500, content=create_openai_error_response(500, error_msg, "server_error"))
|
|
|
24 |
execute_gemini_call,
|
25 |
)
|
26 |
from openai_handler import OpenAIDirectHandler
|
27 |
+
from project_id_discovery import discover_project_id
|
28 |
|
29 |
router = APIRouter()
|
30 |
|
|
|
118 |
try:
|
119 |
# Check if model contains "gemini-2.5-pro" or "gemini-2.5-flash" for direct URL approach
|
120 |
if "gemini-2.5-pro" in base_model_name or "gemini-2.5-flash" in base_model_name:
|
121 |
+
project_id = await discover_project_id(key_val)
|
122 |
+
base_url = f"https://aiplatform.googleapis.com/v1/projects/{project_id}/locations/global"
|
123 |
+
client_to_use = genai.Client(
|
124 |
+
vertexai=True,
|
125 |
+
api_key=key_val,
|
126 |
+
http_options=types.HttpOptions(base_url=base_url)
|
127 |
+
)
|
128 |
+
client_to_use._api_client._http_options.api_version = None
|
129 |
+
print(f"INFO: Attempt {attempt+1}/{total_keys} - Using Vertex Express Mode with custom base URL for model {request.model} (base: {base_model_name}) with API key (original index: {original_idx}).")
|
130 |
else:
|
131 |
client_to_use = genai.Client(vertexai=True, api_key=key_val)
|
132 |
print(f"INFO: Attempt {attempt+1}/{total_keys} - Using Vertex Express Mode SDK for model {request.model} (base: {base_model_name}) with API key (original index: {original_idx}).")
|
|
|
191 |
try:
|
192 |
# Pass is_auto_attempt=True for auto-mode calls
|
193 |
result = await execute_gemini_call(client_to_use, attempt["model"], attempt["prompt_func"], current_gen_config, request, is_auto_attempt=True)
|
|
|
|
|
|
|
194 |
return result
|
195 |
except Exception as e_auto:
|
196 |
last_err = e_auto
|
|
|
199 |
|
200 |
print(f"All auto attempts failed. Last error: {last_err}")
|
201 |
err_msg = f"All auto-mode attempts failed for model {request.model}. Last error: {str(last_err)}"
|
|
|
|
|
|
|
202 |
if not request.stream and last_err:
|
203 |
return JSONResponse(status_code=500, content=create_openai_error_response(500, err_msg, "server_error"))
|
204 |
elif request.stream:
|
|
|
245 |
# but the API call might need the full "gemini-1.5-pro-search".
|
246 |
# Let's use `request.model` for the API call here, and `base_model_name` for checks like Express eligibility.
|
247 |
# For non-auto mode, is_auto_attempt defaults to False in execute_gemini_call
|
248 |
+
return await execute_gemini_call(client_to_use, base_model_name, current_prompt_func, generation_config, request)
|
|
|
|
|
|
|
|
|
|
|
249 |
|
250 |
except Exception as e:
|
251 |
error_msg = f"Unexpected error in chat_completions endpoint: {str(e)}"
|
252 |
print(error_msg)
|
|
|
|
|
|
|
253 |
return JSONResponse(status_code=500, content=create_openai_error_response(500, error_msg, "server_error"))
|