bibibi12345 commited on
Commit
3a3173c
·
1 Parent(s): 48c8917

fixed openai mode thinking

Browse files
app/api_helpers.py CHANGED
@@ -254,7 +254,7 @@ def is_gemini_response_valid(response: Any) -> bool:
254
  if hasattr(response, 'text') and isinstance(response.text, str) and response.text.strip():
255
  return True
256
 
257
- # Check for candidates in the response
258
  if hasattr(response, 'candidates') and response.candidates:
259
  for candidate in response.candidates:
260
  # Check for direct text on candidate
 
254
  if hasattr(response, 'text') and isinstance(response.text, str) and response.text.strip():
255
  return True
256
 
257
+ # Check for candidates (both SDK and DirectVertexClient responses)
258
  if hasattr(response, 'candidates') and response.candidates:
259
  for candidate in response.candidates:
260
  # Check for direct text on candidate
app/openai_handler.py CHANGED
@@ -71,8 +71,9 @@ class OpenAIDirectHandler:
71
  'google': {
72
  'safety_settings': self.safety_settings,
73
  'thought_tag_marker': VERTEX_REASONING_TAG,
74
- 'thinking_config ': {"include_thoughts": True}
75
-
 
76
  }
77
  }
78
  }
 
71
  'google': {
72
  'safety_settings': self.safety_settings,
73
  'thought_tag_marker': VERTEX_REASONING_TAG,
74
+ "thinking_config": {
75
+ "include_thoughts": True
76
+ }
77
  }
78
  }
79
  }
app/routes/chat_api.py CHANGED
@@ -24,7 +24,7 @@ from api_helpers import (
24
  execute_gemini_call,
25
  )
26
  from openai_handler import OpenAIDirectHandler
27
- from project_id_discovery import discover_project_id
28
 
29
  router = APIRouter()
30
 
@@ -118,15 +118,9 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
118
  try:
119
  # Check if model contains "gemini-2.5-pro" or "gemini-2.5-flash" for direct URL approach
120
  if "gemini-2.5-pro" in base_model_name or "gemini-2.5-flash" in base_model_name:
121
- project_id = await discover_project_id(key_val)
122
- base_url = f"https://aiplatform.googleapis.com/v1/projects/{project_id}/locations/global"
123
- client_to_use = genai.Client(
124
- vertexai=True,
125
- api_key=key_val,
126
- http_options=types.HttpOptions(base_url=base_url)
127
- )
128
- client_to_use._api_client._http_options.api_version = None
129
- print(f"INFO: Attempt {attempt+1}/{total_keys} - Using Vertex Express Mode with custom base URL for model {request.model} (base: {base_model_name}) with API key (original index: {original_idx}).")
130
  else:
131
  client_to_use = genai.Client(vertexai=True, api_key=key_val)
132
  print(f"INFO: Attempt {attempt+1}/{total_keys} - Using Vertex Express Mode SDK for model {request.model} (base: {base_model_name}) with API key (original index: {original_idx}).")
@@ -191,6 +185,9 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
191
  try:
192
  # Pass is_auto_attempt=True for auto-mode calls
193
  result = await execute_gemini_call(client_to_use, attempt["model"], attempt["prompt_func"], current_gen_config, request, is_auto_attempt=True)
 
 
 
194
  return result
195
  except Exception as e_auto:
196
  last_err = e_auto
@@ -199,6 +196,9 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
199
 
200
  print(f"All auto attempts failed. Last error: {last_err}")
201
  err_msg = f"All auto-mode attempts failed for model {request.model}. Last error: {str(last_err)}"
 
 
 
202
  if not request.stream and last_err:
203
  return JSONResponse(status_code=500, content=create_openai_error_response(500, err_msg, "server_error"))
204
  elif request.stream:
@@ -245,9 +245,17 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
245
  # but the API call might need the full "gemini-1.5-pro-search".
246
  # Let's use `request.model` for the API call here, and `base_model_name` for checks like Express eligibility.
247
  # For non-auto mode, is_auto_attempt defaults to False in execute_gemini_call
248
- return await execute_gemini_call(client_to_use, base_model_name, current_prompt_func, generation_config, request)
 
 
 
 
 
249
 
250
  except Exception as e:
251
  error_msg = f"Unexpected error in chat_completions endpoint: {str(e)}"
252
  print(error_msg)
 
 
 
253
  return JSONResponse(status_code=500, content=create_openai_error_response(500, error_msg, "server_error"))
 
24
  execute_gemini_call,
25
  )
26
  from openai_handler import OpenAIDirectHandler
27
+ from direct_vertex_client import DirectVertexClient
28
 
29
  router = APIRouter()
30
 
 
118
  try:
119
  # Check if model contains "gemini-2.5-pro" or "gemini-2.5-flash" for direct URL approach
120
  if "gemini-2.5-pro" in base_model_name or "gemini-2.5-flash" in base_model_name:
121
+ client_to_use = DirectVertexClient(api_key=key_val)
122
+ await client_to_use.discover_project_id()
123
+ print(f"INFO: Attempt {attempt+1}/{total_keys} - Using DirectVertexClient for model {request.model} (base: {base_model_name}) with API key (original index: {original_idx}).")
 
 
 
 
 
 
124
  else:
125
  client_to_use = genai.Client(vertexai=True, api_key=key_val)
126
  print(f"INFO: Attempt {attempt+1}/{total_keys} - Using Vertex Express Mode SDK for model {request.model} (base: {base_model_name}) with API key (original index: {original_idx}).")
 
185
  try:
186
  # Pass is_auto_attempt=True for auto-mode calls
187
  result = await execute_gemini_call(client_to_use, attempt["model"], attempt["prompt_func"], current_gen_config, request, is_auto_attempt=True)
188
+ # Clean up DirectVertexClient session if used
189
+ if isinstance(client_to_use, DirectVertexClient):
190
+ await client_to_use.close()
191
  return result
192
  except Exception as e_auto:
193
  last_err = e_auto
 
196
 
197
  print(f"All auto attempts failed. Last error: {last_err}")
198
  err_msg = f"All auto-mode attempts failed for model {request.model}. Last error: {str(last_err)}"
199
+ # Clean up DirectVertexClient session if used
200
+ if isinstance(client_to_use, DirectVertexClient):
201
+ await client_to_use.close()
202
  if not request.stream and last_err:
203
  return JSONResponse(status_code=500, content=create_openai_error_response(500, err_msg, "server_error"))
204
  elif request.stream:
 
245
  # but the API call might need the full "gemini-1.5-pro-search".
246
  # Let's use `request.model` for the API call here, and `base_model_name` for checks like Express eligibility.
247
  # For non-auto mode, is_auto_attempt defaults to False in execute_gemini_call
248
+ try:
249
+ return await execute_gemini_call(client_to_use, base_model_name, current_prompt_func, generation_config, request)
250
+ finally:
251
+ # Clean up DirectVertexClient session if used
252
+ if isinstance(client_to_use, DirectVertexClient):
253
+ await client_to_use.close()
254
 
255
  except Exception as e:
256
  error_msg = f"Unexpected error in chat_completions endpoint: {str(e)}"
257
  print(error_msg)
258
+ # Clean up DirectVertexClient session if it exists
259
+ if 'client_to_use' in locals() and isinstance(client_to_use, DirectVertexClient):
260
+ await client_to_use.close()
261
  return JSONResponse(status_code=500, content=create_openai_error_response(500, error_msg, "server_error"))