Spaces:
Running
Running
Commit
·
cdf27f4
1
Parent(s):
71235a6
added reasoning support
Browse files- app/message_processing.py +102 -39
- app/routes/chat_api.py +66 -6
app/message_processing.py
CHANGED
@@ -342,38 +342,81 @@ def convert_to_openai_format(gemini_response, model: str) -> Dict[str, Any]:
|
|
342 |
|
343 |
if hasattr(gemini_response, 'candidates') and gemini_response.candidates:
|
344 |
for i, candidate in enumerate(gemini_response.candidates):
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
elif hasattr(candidate, 'content') and hasattr(candidate.content, 'parts'):
|
349 |
-
# Ensure content remains a string even if parts have None text
|
350 |
-
parts_texts = []
|
351 |
-
for part_item in candidate.content.parts:
|
352 |
-
if hasattr(part_item, 'text') and part_item.text is not None:
|
353 |
-
parts_texts.append(part_item.text)
|
354 |
-
content = "".join(parts_texts)
|
355 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
356 |
if is_encrypt_full:
|
357 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
358 |
|
359 |
choices.append({
|
360 |
"index": i,
|
361 |
-
"message":
|
362 |
-
"finish_reason": "stop"
|
363 |
})
|
|
|
|
|
364 |
elif hasattr(gemini_response, 'text'):
|
365 |
-
|
366 |
if is_encrypt_full:
|
367 |
-
|
368 |
choices.append({
|
369 |
"index": 0,
|
370 |
-
"message": {"role": "assistant", "content":
|
371 |
"finish_reason": "stop"
|
372 |
})
|
373 |
-
else:
|
374 |
choices.append({
|
375 |
"index": 0,
|
376 |
-
"message": {"role": "assistant", "content": ""},
|
377 |
"finish_reason": "stop"
|
378 |
})
|
379 |
|
@@ -395,32 +438,49 @@ def convert_to_openai_format(gemini_response, model: str) -> Dict[str, Any]:
|
|
395 |
def convert_chunk_to_openai(chunk, model: str, response_id: str, candidate_index: int = 0) -> str:
|
396 |
"""Converts Gemini stream chunk to OpenAI format, applying deobfuscation if needed."""
|
397 |
is_encrypt_full = model.endswith("-encrypt-full")
|
398 |
-
|
|
|
|
|
399 |
|
|
|
|
|
|
|
400 |
try:
|
401 |
-
if hasattr(
|
402 |
-
|
403 |
-
|
404 |
-
# Ensure part_item.text exists, is not None, and convert to string
|
405 |
if hasattr(part_item, 'text') and part_item.text is not None:
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
|
|
|
|
|
|
|
|
|
|
412 |
except Exception as e_chunk_extract:
|
413 |
-
|
414 |
-
|
415 |
-
chunk_content_str = "" # Default to empty string in case of any error
|
416 |
|
417 |
-
|
418 |
-
|
419 |
|
420 |
if is_encrypt_full:
|
421 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
422 |
|
423 |
-
finish_reason = None
|
424 |
# Actual finish reason handling would be more complex if Gemini provides it mid-stream
|
425 |
|
426 |
chunk_data = {
|
@@ -431,13 +491,16 @@ def convert_chunk_to_openai(chunk, model: str, response_id: str, candidate_index
|
|
431 |
"choices": [
|
432 |
{
|
433 |
"index": candidate_index,
|
434 |
-
"delta":
|
435 |
"finish_reason": finish_reason
|
436 |
}
|
437 |
]
|
438 |
}
|
439 |
-
|
440 |
-
|
|
|
|
|
|
|
441 |
return f"data: {json.dumps(chunk_data)}\n\n"
|
442 |
|
443 |
def create_final_chunk(model: str, response_id: str, candidate_count: int = 1) -> str:
|
|
|
342 |
|
343 |
if hasattr(gemini_response, 'candidates') and gemini_response.candidates:
|
344 |
for i, candidate in enumerate(gemini_response.candidates):
|
345 |
+
print(candidate) # Existing print statement
|
346 |
+
reasoning_text_parts = []
|
347 |
+
normal_text_parts = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
348 |
|
349 |
+
gemini_candidate_content = None
|
350 |
+
if hasattr(candidate, 'content'):
|
351 |
+
gemini_candidate_content = candidate.content
|
352 |
+
|
353 |
+
if gemini_candidate_content:
|
354 |
+
try:
|
355 |
+
if hasattr(gemini_candidate_content, 'parts') and gemini_candidate_content.parts:
|
356 |
+
for part_item in gemini_candidate_content.parts:
|
357 |
+
part_text = ""
|
358 |
+
if hasattr(part_item, 'text') and part_item.text is not None:
|
359 |
+
part_text = str(part_item.text)
|
360 |
+
|
361 |
+
# Check for 'thought' attribute on part_item and append directly
|
362 |
+
if hasattr(part_item, 'thought') and part_item.thought is True:
|
363 |
+
reasoning_text_parts.append(part_text)
|
364 |
+
else:
|
365 |
+
normal_text_parts.append(part_text)
|
366 |
+
elif hasattr(gemini_candidate_content, 'text') and gemini_candidate_content.text is not None:
|
367 |
+
# If no 'parts', but 'text' exists on content, it's normal content
|
368 |
+
normal_text_parts.append(str(gemini_candidate_content.text))
|
369 |
+
except Exception as e_extract:
|
370 |
+
print(f"WARNING: Error extracting from candidate.content: {e_extract}. Content: {str(gemini_candidate_content)[:200]}")
|
371 |
+
# Fallback: if candidate.content is not informative, but candidate.text exists directly
|
372 |
+
elif hasattr(candidate, 'text') and candidate.text is not None:
|
373 |
+
normal_text_parts.append(str(candidate.text))
|
374 |
+
|
375 |
+
|
376 |
+
final_reasoning_content_str = "".join(reasoning_text_parts)
|
377 |
+
final_normal_content_str = "".join(normal_text_parts)
|
378 |
+
|
379 |
if is_encrypt_full:
|
380 |
+
final_reasoning_content_str = deobfuscate_text(final_reasoning_content_str)
|
381 |
+
final_normal_content_str = deobfuscate_text(final_normal_content_str)
|
382 |
+
|
383 |
+
message_payload = {"role": "assistant"}
|
384 |
+
if final_reasoning_content_str:
|
385 |
+
message_payload['reasoning_content'] = final_reasoning_content_str
|
386 |
+
|
387 |
+
# Ensure 'content' key is present, even if empty or None, as per OpenAI spec for assistant messages
|
388 |
+
# if not final_normal_content_str and not final_reasoning_content_str:
|
389 |
+
# message_payload['content'] = ""
|
390 |
+
# elif final_reasoning_content_str and not final_normal_content_str:
|
391 |
+
# message_payload['content'] = None
|
392 |
+
# else: # final_normal_content_str has content
|
393 |
+
# message_payload['content'] = final_normal_content_str
|
394 |
+
|
395 |
+
# Simplified logic for content: always include it. If it was empty, it'll be empty string.
|
396 |
+
# If only reasoning was present, content will be empty string.
|
397 |
+
message_payload['content'] = final_normal_content_str
|
398 |
+
|
399 |
|
400 |
choices.append({
|
401 |
"index": i,
|
402 |
+
"message": message_payload,
|
403 |
+
"finish_reason": "stop" # Assuming "stop" as Gemini doesn't always map directly
|
404 |
})
|
405 |
+
|
406 |
+
# This elif handles cases where gemini_response itself might be a simple text response
|
407 |
elif hasattr(gemini_response, 'text'):
|
408 |
+
content_str = gemini_response.text or ""
|
409 |
if is_encrypt_full:
|
410 |
+
content_str = deobfuscate_text(content_str)
|
411 |
choices.append({
|
412 |
"index": 0,
|
413 |
+
"message": {"role": "assistant", "content": content_str},
|
414 |
"finish_reason": "stop"
|
415 |
})
|
416 |
+
else: # Fallback for empty or unexpected response structure
|
417 |
choices.append({
|
418 |
"index": 0,
|
419 |
+
"message": {"role": "assistant", "content": ""}, # Ensure content key
|
420 |
"finish_reason": "stop"
|
421 |
})
|
422 |
|
|
|
438 |
def convert_chunk_to_openai(chunk, model: str, response_id: str, candidate_index: int = 0) -> str:
|
439 |
"""Converts Gemini stream chunk to OpenAI format, applying deobfuscation if needed."""
|
440 |
is_encrypt_full = model.endswith("-encrypt-full")
|
441 |
+
|
442 |
+
# This is original_chunk.candidates[0].content after your reassignment
|
443 |
+
gemini_content_part = chunk.candidates[0].content
|
444 |
|
445 |
+
reasoning_text_parts = []
|
446 |
+
normal_text_parts = []
|
447 |
+
|
448 |
try:
|
449 |
+
if hasattr(gemini_content_part, 'parts') and gemini_content_part.parts:
|
450 |
+
for part_item in gemini_content_part.parts:
|
451 |
+
part_text = ""
|
|
|
452 |
if hasattr(part_item, 'text') and part_item.text is not None:
|
453 |
+
part_text = str(part_item.text)
|
454 |
+
|
455 |
+
# Check for the 'thought' attribute on the part_item itself and append directly
|
456 |
+
if hasattr(part_item, 'thought') and part_item.thought is True: # Corrected to 'thought'
|
457 |
+
reasoning_text_parts.append(part_text)
|
458 |
+
else:
|
459 |
+
normal_text_parts.append(part_text)
|
460 |
+
elif hasattr(gemini_content_part, 'text') and gemini_content_part.text is not None:
|
461 |
+
# If no 'parts', but 'text' exists, it's normal content
|
462 |
+
normal_text_parts.append(str(gemini_content_part.text))
|
463 |
+
# If gemini_content_part has neither .parts nor .text, or if .text is None, both lists remain empty
|
464 |
except Exception as e_chunk_extract:
|
465 |
+
print(f"WARNING: Error extracting content from Gemini content part in convert_chunk_to_openai: {e_chunk_extract}. Content part type: {type(gemini_content_part)}. Data: {str(gemini_content_part)[:200]}")
|
466 |
+
# Fallback to empty if extraction fails, lists will remain empty
|
|
|
467 |
|
468 |
+
final_reasoning_content_str = "".join(reasoning_text_parts)
|
469 |
+
final_normal_content_str = "".join(normal_text_parts)
|
470 |
|
471 |
if is_encrypt_full:
|
472 |
+
final_reasoning_content_str = deobfuscate_text(final_reasoning_content_str)
|
473 |
+
final_normal_content_str = deobfuscate_text(final_normal_content_str)
|
474 |
+
|
475 |
+
# Construct delta payload
|
476 |
+
delta_payload = {}
|
477 |
+
if final_reasoning_content_str: # Only add if there's content
|
478 |
+
delta_payload['reasoning_content'] = final_reasoning_content_str
|
479 |
+
if final_normal_content_str: # Only add if there's content
|
480 |
+
delta_payload['content'] = final_normal_content_str
|
481 |
+
# If both are empty, delta_payload will be an empty dict {}, which is valid for OpenAI stream (empty update)
|
482 |
|
483 |
+
finish_reason = None
|
484 |
# Actual finish reason handling would be more complex if Gemini provides it mid-stream
|
485 |
|
486 |
chunk_data = {
|
|
|
491 |
"choices": [
|
492 |
{
|
493 |
"index": candidate_index,
|
494 |
+
"delta": delta_payload, # Use the new delta_payload
|
495 |
"finish_reason": finish_reason
|
496 |
}
|
497 |
]
|
498 |
}
|
499 |
+
# Note: The original 'chunk' variable in the broader scope was the full Gemini GenerateContentResponse chunk.
|
500 |
+
# The 'logprobs' would be on the candidate, not on gemini_content_part.
|
501 |
+
# We need to access logprobs from the original chunk's candidate.
|
502 |
+
if hasattr(chunk, 'candidates') and chunk.candidates and hasattr(chunk.candidates[0], 'logprobs'):
|
503 |
+
chunk_data["choices"][0]["logprobs"] = getattr(chunk.candidates[0], 'logprobs', None)
|
504 |
return f"data: {json.dumps(chunk_data)}\n\n"
|
505 |
|
506 |
def create_final_chunk(model: str, response_id: str, candidate_count: int = 1) -> str:
|
app/routes/chat_api.py
CHANGED
@@ -228,16 +228,42 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
|
|
228 |
)
|
229 |
async for chunk in stream_response:
|
230 |
try:
|
231 |
-
|
232 |
-
|
233 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
234 |
print(f"ERROR: {error_msg_chunk}")
|
235 |
# Truncate
|
236 |
if len(error_msg_chunk) > 1024:
|
237 |
error_msg_chunk = error_msg_chunk[:1024] + "..."
|
238 |
error_response_chunk = create_openai_error_response(500, error_msg_chunk, "server_error")
|
239 |
-
json_payload_for_chunk_error = json.dumps(error_response_chunk)
|
240 |
-
print(f"DEBUG: Yielding chunk
|
241 |
yield f"data: {json_payload_for_chunk_error}\n\n"
|
242 |
yield "data: [DONE]\n\n"
|
243 |
return # Stop further processing for this request
|
@@ -263,7 +289,41 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
|
|
263 |
**openai_params,
|
264 |
extra_body=openai_extra_body
|
265 |
)
|
266 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
267 |
except Exception as generate_error:
|
268 |
error_msg_generate = f"Error calling OpenAI client for {request.model}: {str(generate_error)}"
|
269 |
print(f"ERROR: {error_msg_generate}")
|
|
|
228 |
)
|
229 |
async for chunk in stream_response:
|
230 |
try:
|
231 |
+
chunk_as_dict = chunk.model_dump(exclude_unset=True, exclude_none=True)
|
232 |
+
print(chunk_as_dict)
|
233 |
+
|
234 |
+
# Safely navigate and check for thought flag
|
235 |
+
choices = chunk_as_dict.get('choices')
|
236 |
+
if choices and isinstance(choices, list) and len(choices) > 0:
|
237 |
+
delta = choices[0].get('delta')
|
238 |
+
if delta and isinstance(delta, dict):
|
239 |
+
extra_content = delta.get('extra_content')
|
240 |
+
if isinstance(extra_content, dict):
|
241 |
+
google_content = extra_content.get('google')
|
242 |
+
if isinstance(google_content, dict) and google_content.get('thought') is True:
|
243 |
+
# This is a thought chunk, modify chunk_as_dict's delta in place
|
244 |
+
reasoning_text = delta.get('content')
|
245 |
+
if reasoning_text is not None:
|
246 |
+
delta['reasoning_content'] = reasoning_text
|
247 |
+
|
248 |
+
if 'content' in delta:
|
249 |
+
del delta['content']
|
250 |
+
|
251 |
+
# Always delete extra_content for thought chunks
|
252 |
+
if 'extra_content' in delta:
|
253 |
+
del delta['extra_content']
|
254 |
+
|
255 |
+
# Yield the (potentially modified) dictionary as JSON
|
256 |
+
yield f"data: {json.dumps(chunk_as_dict)}\n\n"
|
257 |
+
|
258 |
+
except Exception as chunk_processing_error: # Catch errors from dict manipulation or json.dumps
|
259 |
+
error_msg_chunk = f"Error processing or serializing OpenAI chunk for {request.model}: {str(chunk_processing_error)}. Chunk: {str(chunk)[:200]}"
|
260 |
print(f"ERROR: {error_msg_chunk}")
|
261 |
# Truncate
|
262 |
if len(error_msg_chunk) > 1024:
|
263 |
error_msg_chunk = error_msg_chunk[:1024] + "..."
|
264 |
error_response_chunk = create_openai_error_response(500, error_msg_chunk, "server_error")
|
265 |
+
json_payload_for_chunk_error = json.dumps(error_response_chunk) # Ensure json is imported
|
266 |
+
print(f"DEBUG: Yielding chunk processing error JSON payload (OpenAI path): {json_payload_for_chunk_error}")
|
267 |
yield f"data: {json_payload_for_chunk_error}\n\n"
|
268 |
yield "data: [DONE]\n\n"
|
269 |
return # Stop further processing for this request
|
|
|
289 |
**openai_params,
|
290 |
extra_body=openai_extra_body
|
291 |
)
|
292 |
+
response_dict = response.model_dump(exclude_unset=True, exclude_none=True)
|
293 |
+
|
294 |
+
# Process reasoning_tokens for non-streaming response
|
295 |
+
try:
|
296 |
+
usage = response_dict.get('usage')
|
297 |
+
if usage and isinstance(usage, dict):
|
298 |
+
completion_details = usage.get('completion_tokens_details')
|
299 |
+
if completion_details and isinstance(completion_details, dict):
|
300 |
+
num_reasoning_tokens = completion_details.get('reasoning_tokens')
|
301 |
+
|
302 |
+
if isinstance(num_reasoning_tokens, int) and num_reasoning_tokens > 0:
|
303 |
+
choices = response_dict.get('choices')
|
304 |
+
if choices and isinstance(choices, list) and len(choices) > 0:
|
305 |
+
# Ensure choices[0] and message are dicts, model_dump makes them so
|
306 |
+
message_dict = choices[0].get('message')
|
307 |
+
if message_dict and isinstance(message_dict, dict):
|
308 |
+
full_content = message_dict.get('content')
|
309 |
+
if isinstance(full_content, str): # Ensure content is a string
|
310 |
+
reasoning_text = full_content[:num_reasoning_tokens]
|
311 |
+
actual_content = full_content[num_reasoning_tokens:]
|
312 |
+
|
313 |
+
message_dict['reasoning_content'] = reasoning_text
|
314 |
+
message_dict['content'] = actual_content
|
315 |
+
|
316 |
+
# Clean up Vertex-specific field
|
317 |
+
del completion_details['reasoning_tokens']
|
318 |
+
if not completion_details: # If dict is now empty
|
319 |
+
del usage['completion_tokens_details']
|
320 |
+
if not usage: # If dict is now empty
|
321 |
+
del response_dict['usage']
|
322 |
+
except Exception as e_non_stream_reasoning:
|
323 |
+
print(f"WARNING: Could not process non-streaming reasoning tokens for model {request.model}: {e_non_stream_reasoning}. Response will be returned as is from Vertex.")
|
324 |
+
# Fallthrough to return response_dict as is if processing fails
|
325 |
+
|
326 |
+
return JSONResponse(content=response_dict)
|
327 |
except Exception as generate_error:
|
328 |
error_msg_generate = f"Error calling OpenAI client for {request.model}: {str(generate_error)}"
|
329 |
print(f"ERROR: {error_msg_generate}")
|