bibibi12345 commited on
Commit
8cbf088
·
1 Parent(s): ca416d9

fixed openai cot

Browse files
Files changed (1) hide show
  1. app/openai_handler.py +41 -33
app/openai_handler.py CHANGED
@@ -234,35 +234,47 @@ class OpenAIDirectHandler:
234
 
235
  content = delta.get('content', '')
236
  if content:
237
- # print(f"DEBUG: Chunk {chunk_count} - Raw content: '{content}'")
238
  # Use the processor to extract reasoning
239
  processed_content, current_reasoning = reasoning_processor.process_chunk(content)
240
 
241
- # Debug logging for processing results
242
- # if processed_content or current_reasoning:
243
- # print(f"DEBUG: Chunk {chunk_count} - Processed content: '{processed_content}', Reasoning: '{current_reasoning[:50]}...' if len(current_reasoning) > 50 else '{current_reasoning}'")
244
-
245
  # Send chunks for both reasoning and content as they arrive
246
- chunks_to_send = []
247
-
248
- # If we have reasoning content, send it
 
249
  if current_reasoning:
250
- reasoning_chunk = chunk_as_dict.copy()
251
- reasoning_chunk['choices'][0]['delta'] = {'reasoning_content': current_reasoning}
252
- chunks_to_send.append(reasoning_chunk)
 
 
 
 
253
 
254
- # If we have regular content, send it
255
  if processed_content:
256
- content_chunk = chunk_as_dict.copy()
257
- content_chunk['choices'][0]['delta'] = {'content': processed_content}
258
- chunks_to_send.append(content_chunk)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  has_sent_content = True
260
 
261
- # Send all chunks
262
- for chunk_to_send in chunks_to_send:
263
- yield f"data: {json.dumps(chunk_to_send)}\n\n"
264
- else:
265
- # Still yield the chunk even if no content (could have other delta fields)
266
  yield f"data: {json.dumps(chunk_as_dict)}\n\n"
267
  else:
268
  # Yield chunks without choices too (they might contain metadata)
@@ -282,44 +294,41 @@ class OpenAIDirectHandler:
282
  # print(f"DEBUG: Stream ended after {chunk_count} chunks. Buffer state - tag_buffer: '{reasoning_processor.tag_buffer}', "
283
  # f"inside_tag: {reasoning_processor.inside_tag}, "
284
  # f"reasoning_buffer: '{reasoning_processor.reasoning_buffer[:50]}...' if reasoning_processor.reasoning_buffer else ''")
285
-
286
  # Flush any remaining buffered content
287
  remaining_content, remaining_reasoning = reasoning_processor.flush_remaining()
288
 
289
  # Send any remaining reasoning first
290
  if remaining_reasoning:
291
- # print(f"DEBUG: Flushing remaining reasoning: '{remaining_reasoning[:50]}...' if len(remaining_reasoning) > 50 else '{remaining_reasoning}'")
292
- reasoning_chunk = {
293
- "id": f"chatcmpl-{int(time.time())}",
294
  "object": "chat.completion.chunk",
295
  "created": int(time.time()),
296
  "model": request.model,
297
  "choices": [{"index": 0, "delta": {"reasoning_content": remaining_reasoning}, "finish_reason": None}]
298
  }
299
- yield f"data: {json.dumps(reasoning_chunk)}\n\n"
300
 
301
  # Send any remaining content
302
  if remaining_content:
303
- # print(f"DEBUG: Flushing remaining content: '{remaining_content}'")
304
- final_chunk = {
305
- "id": f"chatcmpl-{int(time.time())}",
306
  "object": "chat.completion.chunk",
307
  "created": int(time.time()),
308
  "model": request.model,
309
  "choices": [{"index": 0, "delta": {"content": remaining_content}, "finish_reason": None}]
310
  }
311
- yield f"data: {json.dumps(final_chunk)}\n\n"
312
  has_sent_content = True
313
 
314
  # Always send a finish reason chunk
315
- finish_chunk = {
316
- "id": f"chatcmpl-{int(time.time())}",
317
  "object": "chat.completion.chunk",
318
  "created": int(time.time()),
319
  "model": request.model,
320
  "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]
321
  }
322
- yield f"data: {json.dumps(finish_chunk)}\n\n"
323
 
324
  yield "data: [DONE]\n\n"
325
 
@@ -422,7 +431,6 @@ class OpenAIDirectHandler:
422
  gcp_token = _refresh_auth(rotated_credentials)
423
  if not gcp_token:
424
  raise Exception(f"Failed to obtain valid GCP token for OpenAI client (Project: {rotated_project_id}).")
425
-
426
  client = self.create_openai_client(rotated_project_id, gcp_token)
427
 
428
  model_id = f"google/{base_model_name}"
 
234
 
235
  content = delta.get('content', '')
236
  if content:
 
237
  # Use the processor to extract reasoning
238
  processed_content, current_reasoning = reasoning_processor.process_chunk(content)
239
 
 
 
 
 
240
  # Send chunks for both reasoning and content as they arrive
241
+ original_choice = chunk_as_dict['choices'][0]
242
+ original_finish_reason = original_choice.get('finish_reason')
243
+ original_usage = original_choice.get('usage')
244
+
245
  if current_reasoning:
246
+ reasoning_delta = {'reasoning_content': current_reasoning}
247
+ reasoning_payload = {
248
+ "id": chunk_as_dict["id"], "object": chunk_as_dict["object"],
249
+ "created": chunk_as_dict["created"], "model": chunk_as_dict["model"],
250
+ "choices": [{"index": 0, "delta": reasoning_delta, "finish_reason": None}]
251
+ }
252
+ yield f"data: {json.dumps(reasoning_payload)}\n\n"
253
 
 
254
  if processed_content:
255
+ content_delta = {'content': processed_content}
256
+ finish_reason_for_this_content_delta = None
257
+ usage_for_this_content_delta = None
258
+
259
+ if original_finish_reason and not reasoning_processor.inside_tag:
260
+ finish_reason_for_this_content_delta = original_finish_reason
261
+ if original_usage:
262
+ usage_for_this_content_delta = original_usage
263
+
264
+ content_payload = {
265
+ "id": chunk_as_dict["id"], "object": chunk_as_dict["object"],
266
+ "created": chunk_as_dict["created"], "model": chunk_as_dict["model"],
267
+ "choices": [{"index": 0, "delta": content_delta, "finish_reason": finish_reason_for_this_content_delta}]
268
+ }
269
+ if usage_for_this_content_delta:
270
+ content_payload['choices'][0]['usage'] = usage_for_this_content_delta
271
+
272
+ yield f"data: {json.dumps(content_payload)}\n\n"
273
  has_sent_content = True
274
 
275
+ elif original_choice.get('finish_reason'): # Check original_choice for finish_reason
276
+ yield f"data: {json.dumps(chunk_as_dict)}\n\n"
277
+ elif not content and not original_choice.get('finish_reason') :
 
 
278
  yield f"data: {json.dumps(chunk_as_dict)}\n\n"
279
  else:
280
  # Yield chunks without choices too (they might contain metadata)
 
294
  # print(f"DEBUG: Stream ended after {chunk_count} chunks. Buffer state - tag_buffer: '{reasoning_processor.tag_buffer}', "
295
  # f"inside_tag: {reasoning_processor.inside_tag}, "
296
  # f"reasoning_buffer: '{reasoning_processor.reasoning_buffer[:50]}...' if reasoning_processor.reasoning_buffer else ''")
 
297
  # Flush any remaining buffered content
298
  remaining_content, remaining_reasoning = reasoning_processor.flush_remaining()
299
 
300
  # Send any remaining reasoning first
301
  if remaining_reasoning:
302
+ reasoning_flush_payload = {
303
+ "id": f"chatcmpl-flush-{int(time.time())}",
 
304
  "object": "chat.completion.chunk",
305
  "created": int(time.time()),
306
  "model": request.model,
307
  "choices": [{"index": 0, "delta": {"reasoning_content": remaining_reasoning}, "finish_reason": None}]
308
  }
309
+ yield f"data: {json.dumps(reasoning_flush_payload)}\n\n"
310
 
311
  # Send any remaining content
312
  if remaining_content:
313
+ content_flush_payload = {
314
+ "id": f"chatcmpl-flush-{int(time.time())}",
 
315
  "object": "chat.completion.chunk",
316
  "created": int(time.time()),
317
  "model": request.model,
318
  "choices": [{"index": 0, "delta": {"content": remaining_content}, "finish_reason": None}]
319
  }
320
+ yield f"data: {json.dumps(content_flush_payload)}\n\n"
321
  has_sent_content = True
322
 
323
  # Always send a finish reason chunk
324
+ finish_payload = {
325
+ "id": f"chatcmpl-final-{int(time.time())}", # Kilo Code: Changed ID for clarity
326
  "object": "chat.completion.chunk",
327
  "created": int(time.time()),
328
  "model": request.model,
329
  "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]
330
  }
331
+ yield f"data: {json.dumps(finish_payload)}\n\n"
332
 
333
  yield "data: [DONE]\n\n"
334
 
 
431
  gcp_token = _refresh_auth(rotated_credentials)
432
  if not gcp_token:
433
  raise Exception(f"Failed to obtain valid GCP token for OpenAI client (Project: {rotated_project_id}).")
 
434
  client = self.create_openai_client(rotated_project_id, gcp_token)
435
 
436
  model_id = f"google/{base_model_name}"