bibibi12345 commited on
Commit
cdf27f4
·
1 Parent(s): 71235a6

added reasoning support

Browse files
Files changed (2) hide show
  1. app/message_processing.py +102 -39
  2. app/routes/chat_api.py +66 -6
app/message_processing.py CHANGED
@@ -342,38 +342,81 @@ def convert_to_openai_format(gemini_response, model: str) -> Dict[str, Any]:
342
 
343
  if hasattr(gemini_response, 'candidates') and gemini_response.candidates:
344
  for i, candidate in enumerate(gemini_response.candidates):
345
- content = ""
346
- if hasattr(candidate, 'text'):
347
- content = candidate.text or "" # Coalesce None to empty string
348
- elif hasattr(candidate, 'content') and hasattr(candidate.content, 'parts'):
349
- # Ensure content remains a string even if parts have None text
350
- parts_texts = []
351
- for part_item in candidate.content.parts:
352
- if hasattr(part_item, 'text') and part_item.text is not None:
353
- parts_texts.append(part_item.text)
354
- content = "".join(parts_texts)
355
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
  if is_encrypt_full:
357
- content = deobfuscate_text(content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
 
359
  choices.append({
360
  "index": i,
361
- "message": {"role": "assistant", "content": content},
362
- "finish_reason": "stop"
363
  })
 
 
364
  elif hasattr(gemini_response, 'text'):
365
- content = gemini_response.text or "" # Coalesce None to empty string
366
  if is_encrypt_full:
367
- content = deobfuscate_text(content) # deobfuscate_text should also be robust to empty string
368
  choices.append({
369
  "index": 0,
370
- "message": {"role": "assistant", "content": content},
371
  "finish_reason": "stop"
372
  })
373
- else:
374
  choices.append({
375
  "index": 0,
376
- "message": {"role": "assistant", "content": ""},
377
  "finish_reason": "stop"
378
  })
379
 
@@ -395,32 +438,49 @@ def convert_to_openai_format(gemini_response, model: str) -> Dict[str, Any]:
395
  def convert_chunk_to_openai(chunk, model: str, response_id: str, candidate_index: int = 0) -> str:
396
  """Converts Gemini stream chunk to OpenAI format, applying deobfuscation if needed."""
397
  is_encrypt_full = model.endswith("-encrypt-full")
398
- chunk_content_str = "" # Renamed for clarity and to ensure it's always a string
 
 
399
 
 
 
 
400
  try:
401
- if hasattr(chunk, 'parts') and chunk.parts:
402
- current_parts_texts = []
403
- for part_item in chunk.parts:
404
- # Ensure part_item.text exists, is not None, and convert to string
405
  if hasattr(part_item, 'text') and part_item.text is not None:
406
- current_parts_texts.append(str(part_item.text))
407
- chunk_content_str = "".join(current_parts_texts)
408
- elif hasattr(chunk, 'text') and chunk.text is not None:
409
- # Ensure chunk.text is converted to string if it's not None
410
- chunk_content_str = str(chunk.text)
411
- # If chunk has neither .parts nor .text, or if .text is None, chunk_content_str remains ""
 
 
 
 
 
412
  except Exception as e_chunk_extract:
413
- # Log the error and the problematic chunk structure
414
- print(f"WARNING: Error extracting content from chunk in convert_chunk_to_openai: {e_chunk_extract}. Chunk type: {type(chunk)}. Chunk data: {str(chunk)[:200]}")
415
- chunk_content_str = "" # Default to empty string in case of any error
416
 
417
- if is_encrypt_full:
418
- chunk_content_str = deobfuscate_text(chunk_content_str) # deobfuscate_text should handle empty string
419
 
420
  if is_encrypt_full:
421
- chunk_content = deobfuscate_text(chunk_content)
 
 
 
 
 
 
 
 
 
422
 
423
- finish_reason = None
424
  # Actual finish reason handling would be more complex if Gemini provides it mid-stream
425
 
426
  chunk_data = {
@@ -431,13 +491,16 @@ def convert_chunk_to_openai(chunk, model: str, response_id: str, candidate_index
431
  "choices": [
432
  {
433
  "index": candidate_index,
434
- "delta": {**({"content": chunk_content_str} if chunk_content_str else {})},
435
  "finish_reason": finish_reason
436
  }
437
  ]
438
  }
439
- if hasattr(chunk, 'logprobs'):
440
- chunk_data["choices"][0]["logprobs"] = getattr(chunk, 'logprobs', None)
 
 
 
441
  return f"data: {json.dumps(chunk_data)}\n\n"
442
 
443
  def create_final_chunk(model: str, response_id: str, candidate_count: int = 1) -> str:
 
342
 
343
  if hasattr(gemini_response, 'candidates') and gemini_response.candidates:
344
  for i, candidate in enumerate(gemini_response.candidates):
345
+ print(candidate) # Existing print statement
346
+ reasoning_text_parts = []
347
+ normal_text_parts = []
 
 
 
 
 
 
 
348
 
349
+ gemini_candidate_content = None
350
+ if hasattr(candidate, 'content'):
351
+ gemini_candidate_content = candidate.content
352
+
353
+ if gemini_candidate_content:
354
+ try:
355
+ if hasattr(gemini_candidate_content, 'parts') and gemini_candidate_content.parts:
356
+ for part_item in gemini_candidate_content.parts:
357
+ part_text = ""
358
+ if hasattr(part_item, 'text') and part_item.text is not None:
359
+ part_text = str(part_item.text)
360
+
361
+ # Check for 'thought' attribute on part_item and append directly
362
+ if hasattr(part_item, 'thought') and part_item.thought is True:
363
+ reasoning_text_parts.append(part_text)
364
+ else:
365
+ normal_text_parts.append(part_text)
366
+ elif hasattr(gemini_candidate_content, 'text') and gemini_candidate_content.text is not None:
367
+ # If no 'parts', but 'text' exists on content, it's normal content
368
+ normal_text_parts.append(str(gemini_candidate_content.text))
369
+ except Exception as e_extract:
370
+ print(f"WARNING: Error extracting from candidate.content: {e_extract}. Content: {str(gemini_candidate_content)[:200]}")
371
+ # Fallback: if candidate.content is not informative, but candidate.text exists directly
372
+ elif hasattr(candidate, 'text') and candidate.text is not None:
373
+ normal_text_parts.append(str(candidate.text))
374
+
375
+
376
+ final_reasoning_content_str = "".join(reasoning_text_parts)
377
+ final_normal_content_str = "".join(normal_text_parts)
378
+
379
  if is_encrypt_full:
380
+ final_reasoning_content_str = deobfuscate_text(final_reasoning_content_str)
381
+ final_normal_content_str = deobfuscate_text(final_normal_content_str)
382
+
383
+ message_payload = {"role": "assistant"}
384
+ if final_reasoning_content_str:
385
+ message_payload['reasoning_content'] = final_reasoning_content_str
386
+
387
+ # Ensure 'content' key is present, even if empty or None, as per OpenAI spec for assistant messages
388
+ # if not final_normal_content_str and not final_reasoning_content_str:
389
+ # message_payload['content'] = ""
390
+ # elif final_reasoning_content_str and not final_normal_content_str:
391
+ # message_payload['content'] = None
392
+ # else: # final_normal_content_str has content
393
+ # message_payload['content'] = final_normal_content_str
394
+
395
+ # Simplified logic for content: always include it. If it was empty, it'll be empty string.
396
+ # If only reasoning was present, content will be empty string.
397
+ message_payload['content'] = final_normal_content_str
398
+
399
 
400
  choices.append({
401
  "index": i,
402
+ "message": message_payload,
403
+ "finish_reason": "stop" # Assuming "stop" as Gemini doesn't always map directly
404
  })
405
+
406
+ # This elif handles cases where gemini_response itself might be a simple text response
407
  elif hasattr(gemini_response, 'text'):
408
+ content_str = gemini_response.text or ""
409
  if is_encrypt_full:
410
+ content_str = deobfuscate_text(content_str)
411
  choices.append({
412
  "index": 0,
413
+ "message": {"role": "assistant", "content": content_str},
414
  "finish_reason": "stop"
415
  })
416
+ else: # Fallback for empty or unexpected response structure
417
  choices.append({
418
  "index": 0,
419
+ "message": {"role": "assistant", "content": ""}, # Ensure content key
420
  "finish_reason": "stop"
421
  })
422
 
 
438
  def convert_chunk_to_openai(chunk, model: str, response_id: str, candidate_index: int = 0) -> str:
439
  """Converts Gemini stream chunk to OpenAI format, applying deobfuscation if needed."""
440
  is_encrypt_full = model.endswith("-encrypt-full")
441
+
442
+ # This is original_chunk.candidates[0].content after your reassignment
443
+ gemini_content_part = chunk.candidates[0].content
444
 
445
+ reasoning_text_parts = []
446
+ normal_text_parts = []
447
+
448
  try:
449
+ if hasattr(gemini_content_part, 'parts') and gemini_content_part.parts:
450
+ for part_item in gemini_content_part.parts:
451
+ part_text = ""
 
452
  if hasattr(part_item, 'text') and part_item.text is not None:
453
+ part_text = str(part_item.text)
454
+
455
+ # Check for the 'thought' attribute on the part_item itself and append directly
456
+ if hasattr(part_item, 'thought') and part_item.thought is True: # Corrected to 'thought'
457
+ reasoning_text_parts.append(part_text)
458
+ else:
459
+ normal_text_parts.append(part_text)
460
+ elif hasattr(gemini_content_part, 'text') and gemini_content_part.text is not None:
461
+ # If no 'parts', but 'text' exists, it's normal content
462
+ normal_text_parts.append(str(gemini_content_part.text))
463
+ # If gemini_content_part has neither .parts nor .text, or if .text is None, both lists remain empty
464
  except Exception as e_chunk_extract:
465
+ print(f"WARNING: Error extracting content from Gemini content part in convert_chunk_to_openai: {e_chunk_extract}. Content part type: {type(gemini_content_part)}. Data: {str(gemini_content_part)[:200]}")
466
+ # Fallback to empty if extraction fails, lists will remain empty
 
467
 
468
+ final_reasoning_content_str = "".join(reasoning_text_parts)
469
+ final_normal_content_str = "".join(normal_text_parts)
470
 
471
  if is_encrypt_full:
472
+ final_reasoning_content_str = deobfuscate_text(final_reasoning_content_str)
473
+ final_normal_content_str = deobfuscate_text(final_normal_content_str)
474
+
475
+ # Construct delta payload
476
+ delta_payload = {}
477
+ if final_reasoning_content_str: # Only add if there's content
478
+ delta_payload['reasoning_content'] = final_reasoning_content_str
479
+ if final_normal_content_str: # Only add if there's content
480
+ delta_payload['content'] = final_normal_content_str
481
+ # If both are empty, delta_payload will be an empty dict {}, which is valid for OpenAI stream (empty update)
482
 
483
+ finish_reason = None
484
  # Actual finish reason handling would be more complex if Gemini provides it mid-stream
485
 
486
  chunk_data = {
 
491
  "choices": [
492
  {
493
  "index": candidate_index,
494
+ "delta": delta_payload, # Use the new delta_payload
495
  "finish_reason": finish_reason
496
  }
497
  ]
498
  }
499
+ # Note: The original 'chunk' variable in the broader scope was the full Gemini GenerateContentResponse chunk.
500
+ # The 'logprobs' would be on the candidate, not on gemini_content_part.
501
+ # We need to access logprobs from the original chunk's candidate.
502
+ if hasattr(chunk, 'candidates') and chunk.candidates and hasattr(chunk.candidates[0], 'logprobs'):
503
+ chunk_data["choices"][0]["logprobs"] = getattr(chunk.candidates[0], 'logprobs', None)
504
  return f"data: {json.dumps(chunk_data)}\n\n"
505
 
506
  def create_final_chunk(model: str, response_id: str, candidate_count: int = 1) -> str:
app/routes/chat_api.py CHANGED
@@ -228,16 +228,42 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
228
  )
229
  async for chunk in stream_response:
230
  try:
231
- yield f"data: {chunk.model_dump_json()}\n\n"
232
- except Exception as chunk_serialization_error:
233
- error_msg_chunk = f"Error serializing OpenAI chunk for {request.model}: {str(chunk_serialization_error)}. Chunk: {str(chunk)[:200]}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  print(f"ERROR: {error_msg_chunk}")
235
  # Truncate
236
  if len(error_msg_chunk) > 1024:
237
  error_msg_chunk = error_msg_chunk[:1024] + "..."
238
  error_response_chunk = create_openai_error_response(500, error_msg_chunk, "server_error")
239
- json_payload_for_chunk_error = json.dumps(error_response_chunk)
240
- print(f"DEBUG: Yielding chunk serialization error JSON payload (OpenAI path): {json_payload_for_chunk_error}")
241
  yield f"data: {json_payload_for_chunk_error}\n\n"
242
  yield "data: [DONE]\n\n"
243
  return # Stop further processing for this request
@@ -263,7 +289,41 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
263
  **openai_params,
264
  extra_body=openai_extra_body
265
  )
266
- return JSONResponse(content=response.model_dump(exclude_unset=True))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
  except Exception as generate_error:
268
  error_msg_generate = f"Error calling OpenAI client for {request.model}: {str(generate_error)}"
269
  print(f"ERROR: {error_msg_generate}")
 
228
  )
229
  async for chunk in stream_response:
230
  try:
231
+ chunk_as_dict = chunk.model_dump(exclude_unset=True, exclude_none=True)
232
+ print(chunk_as_dict)
233
+
234
+ # Safely navigate and check for thought flag
235
+ choices = chunk_as_dict.get('choices')
236
+ if choices and isinstance(choices, list) and len(choices) > 0:
237
+ delta = choices[0].get('delta')
238
+ if delta and isinstance(delta, dict):
239
+ extra_content = delta.get('extra_content')
240
+ if isinstance(extra_content, dict):
241
+ google_content = extra_content.get('google')
242
+ if isinstance(google_content, dict) and google_content.get('thought') is True:
243
+ # This is a thought chunk, modify chunk_as_dict's delta in place
244
+ reasoning_text = delta.get('content')
245
+ if reasoning_text is not None:
246
+ delta['reasoning_content'] = reasoning_text
247
+
248
+ if 'content' in delta:
249
+ del delta['content']
250
+
251
+ # Always delete extra_content for thought chunks
252
+ if 'extra_content' in delta:
253
+ del delta['extra_content']
254
+
255
+ # Yield the (potentially modified) dictionary as JSON
256
+ yield f"data: {json.dumps(chunk_as_dict)}\n\n"
257
+
258
+ except Exception as chunk_processing_error: # Catch errors from dict manipulation or json.dumps
259
+ error_msg_chunk = f"Error processing or serializing OpenAI chunk for {request.model}: {str(chunk_processing_error)}. Chunk: {str(chunk)[:200]}"
260
  print(f"ERROR: {error_msg_chunk}")
261
  # Truncate
262
  if len(error_msg_chunk) > 1024:
263
  error_msg_chunk = error_msg_chunk[:1024] + "..."
264
  error_response_chunk = create_openai_error_response(500, error_msg_chunk, "server_error")
265
+ json_payload_for_chunk_error = json.dumps(error_response_chunk) # Ensure json is imported
266
+ print(f"DEBUG: Yielding chunk processing error JSON payload (OpenAI path): {json_payload_for_chunk_error}")
267
  yield f"data: {json_payload_for_chunk_error}\n\n"
268
  yield "data: [DONE]\n\n"
269
  return # Stop further processing for this request
 
289
  **openai_params,
290
  extra_body=openai_extra_body
291
  )
292
+ response_dict = response.model_dump(exclude_unset=True, exclude_none=True)
293
+
294
+ # Process reasoning_tokens for non-streaming response
295
+ try:
296
+ usage = response_dict.get('usage')
297
+ if usage and isinstance(usage, dict):
298
+ completion_details = usage.get('completion_tokens_details')
299
+ if completion_details and isinstance(completion_details, dict):
300
+ num_reasoning_tokens = completion_details.get('reasoning_tokens')
301
+
302
+ if isinstance(num_reasoning_tokens, int) and num_reasoning_tokens > 0:
303
+ choices = response_dict.get('choices')
304
+ if choices and isinstance(choices, list) and len(choices) > 0:
305
+ # Ensure choices[0] and message are dicts, model_dump makes them so
306
+ message_dict = choices[0].get('message')
307
+ if message_dict and isinstance(message_dict, dict):
308
+ full_content = message_dict.get('content')
309
+ if isinstance(full_content, str): # Ensure content is a string
310
+ reasoning_text = full_content[:num_reasoning_tokens]
311
+ actual_content = full_content[num_reasoning_tokens:]
312
+
313
+ message_dict['reasoning_content'] = reasoning_text
314
+ message_dict['content'] = actual_content
315
+
316
+ # Clean up Vertex-specific field
317
+ del completion_details['reasoning_tokens']
318
+ if not completion_details: # If dict is now empty
319
+ del usage['completion_tokens_details']
320
+ if not usage: # If dict is now empty
321
+ del response_dict['usage']
322
+ except Exception as e_non_stream_reasoning:
323
+ print(f"WARNING: Could not process non-streaming reasoning tokens for model {request.model}: {e_non_stream_reasoning}. Response will be returned as is from Vertex.")
324
+ # Fallthrough to return response_dict as is if processing fails
325
+
326
+ return JSONResponse(content=response_dict)
327
  except Exception as generate_error:
328
  error_msg_generate = f"Error calling OpenAI client for {request.model}: {str(generate_error)}"
329
  print(f"ERROR: {error_msg_generate}")