Spaces:
Running
Running
Commit
·
8cbf088
1
Parent(s):
ca416d9
fixed openai cot
Browse files- app/openai_handler.py +41 -33
app/openai_handler.py
CHANGED
@@ -234,35 +234,47 @@ class OpenAIDirectHandler:
|
|
234 |
|
235 |
content = delta.get('content', '')
|
236 |
if content:
|
237 |
-
# print(f"DEBUG: Chunk {chunk_count} - Raw content: '{content}'")
|
238 |
# Use the processor to extract reasoning
|
239 |
processed_content, current_reasoning = reasoning_processor.process_chunk(content)
|
240 |
|
241 |
-
# Debug logging for processing results
|
242 |
-
# if processed_content or current_reasoning:
|
243 |
-
# print(f"DEBUG: Chunk {chunk_count} - Processed content: '{processed_content}', Reasoning: '{current_reasoning[:50]}...' if len(current_reasoning) > 50 else '{current_reasoning}'")
|
244 |
-
|
245 |
# Send chunks for both reasoning and content as they arrive
|
246 |
-
|
247 |
-
|
248 |
-
|
|
|
249 |
if current_reasoning:
|
250 |
-
|
251 |
-
|
252 |
-
|
|
|
|
|
|
|
|
|
253 |
|
254 |
-
# If we have regular content, send it
|
255 |
if processed_content:
|
256 |
-
|
257 |
-
|
258 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
259 |
has_sent_content = True
|
260 |
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
else:
|
265 |
-
# Still yield the chunk even if no content (could have other delta fields)
|
266 |
yield f"data: {json.dumps(chunk_as_dict)}\n\n"
|
267 |
else:
|
268 |
# Yield chunks without choices too (they might contain metadata)
|
@@ -282,44 +294,41 @@ class OpenAIDirectHandler:
|
|
282 |
# print(f"DEBUG: Stream ended after {chunk_count} chunks. Buffer state - tag_buffer: '{reasoning_processor.tag_buffer}', "
|
283 |
# f"inside_tag: {reasoning_processor.inside_tag}, "
|
284 |
# f"reasoning_buffer: '{reasoning_processor.reasoning_buffer[:50]}...' if reasoning_processor.reasoning_buffer else ''")
|
285 |
-
|
286 |
# Flush any remaining buffered content
|
287 |
remaining_content, remaining_reasoning = reasoning_processor.flush_remaining()
|
288 |
|
289 |
# Send any remaining reasoning first
|
290 |
if remaining_reasoning:
|
291 |
-
|
292 |
-
|
293 |
-
"id": f"chatcmpl-{int(time.time())}",
|
294 |
"object": "chat.completion.chunk",
|
295 |
"created": int(time.time()),
|
296 |
"model": request.model,
|
297 |
"choices": [{"index": 0, "delta": {"reasoning_content": remaining_reasoning}, "finish_reason": None}]
|
298 |
}
|
299 |
-
yield f"data: {json.dumps(
|
300 |
|
301 |
# Send any remaining content
|
302 |
if remaining_content:
|
303 |
-
|
304 |
-
|
305 |
-
"id": f"chatcmpl-{int(time.time())}",
|
306 |
"object": "chat.completion.chunk",
|
307 |
"created": int(time.time()),
|
308 |
"model": request.model,
|
309 |
"choices": [{"index": 0, "delta": {"content": remaining_content}, "finish_reason": None}]
|
310 |
}
|
311 |
-
yield f"data: {json.dumps(
|
312 |
has_sent_content = True
|
313 |
|
314 |
# Always send a finish reason chunk
|
315 |
-
|
316 |
-
"id": f"chatcmpl-{int(time.time())}",
|
317 |
"object": "chat.completion.chunk",
|
318 |
"created": int(time.time()),
|
319 |
"model": request.model,
|
320 |
"choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]
|
321 |
}
|
322 |
-
yield f"data: {json.dumps(
|
323 |
|
324 |
yield "data: [DONE]\n\n"
|
325 |
|
@@ -422,7 +431,6 @@ class OpenAIDirectHandler:
|
|
422 |
gcp_token = _refresh_auth(rotated_credentials)
|
423 |
if not gcp_token:
|
424 |
raise Exception(f"Failed to obtain valid GCP token for OpenAI client (Project: {rotated_project_id}).")
|
425 |
-
|
426 |
client = self.create_openai_client(rotated_project_id, gcp_token)
|
427 |
|
428 |
model_id = f"google/{base_model_name}"
|
|
|
234 |
|
235 |
content = delta.get('content', '')
|
236 |
if content:
|
|
|
237 |
# Use the processor to extract reasoning
|
238 |
processed_content, current_reasoning = reasoning_processor.process_chunk(content)
|
239 |
|
|
|
|
|
|
|
|
|
240 |
# Send chunks for both reasoning and content as they arrive
|
241 |
+
original_choice = chunk_as_dict['choices'][0]
|
242 |
+
original_finish_reason = original_choice.get('finish_reason')
|
243 |
+
original_usage = original_choice.get('usage')
|
244 |
+
|
245 |
if current_reasoning:
|
246 |
+
reasoning_delta = {'reasoning_content': current_reasoning}
|
247 |
+
reasoning_payload = {
|
248 |
+
"id": chunk_as_dict["id"], "object": chunk_as_dict["object"],
|
249 |
+
"created": chunk_as_dict["created"], "model": chunk_as_dict["model"],
|
250 |
+
"choices": [{"index": 0, "delta": reasoning_delta, "finish_reason": None}]
|
251 |
+
}
|
252 |
+
yield f"data: {json.dumps(reasoning_payload)}\n\n"
|
253 |
|
|
|
254 |
if processed_content:
|
255 |
+
content_delta = {'content': processed_content}
|
256 |
+
finish_reason_for_this_content_delta = None
|
257 |
+
usage_for_this_content_delta = None
|
258 |
+
|
259 |
+
if original_finish_reason and not reasoning_processor.inside_tag:
|
260 |
+
finish_reason_for_this_content_delta = original_finish_reason
|
261 |
+
if original_usage:
|
262 |
+
usage_for_this_content_delta = original_usage
|
263 |
+
|
264 |
+
content_payload = {
|
265 |
+
"id": chunk_as_dict["id"], "object": chunk_as_dict["object"],
|
266 |
+
"created": chunk_as_dict["created"], "model": chunk_as_dict["model"],
|
267 |
+
"choices": [{"index": 0, "delta": content_delta, "finish_reason": finish_reason_for_this_content_delta}]
|
268 |
+
}
|
269 |
+
if usage_for_this_content_delta:
|
270 |
+
content_payload['choices'][0]['usage'] = usage_for_this_content_delta
|
271 |
+
|
272 |
+
yield f"data: {json.dumps(content_payload)}\n\n"
|
273 |
has_sent_content = True
|
274 |
|
275 |
+
elif original_choice.get('finish_reason'): # Check original_choice for finish_reason
|
276 |
+
yield f"data: {json.dumps(chunk_as_dict)}\n\n"
|
277 |
+
elif not content and not original_choice.get('finish_reason') :
|
|
|
|
|
278 |
yield f"data: {json.dumps(chunk_as_dict)}\n\n"
|
279 |
else:
|
280 |
# Yield chunks without choices too (they might contain metadata)
|
|
|
294 |
# print(f"DEBUG: Stream ended after {chunk_count} chunks. Buffer state - tag_buffer: '{reasoning_processor.tag_buffer}', "
|
295 |
# f"inside_tag: {reasoning_processor.inside_tag}, "
|
296 |
# f"reasoning_buffer: '{reasoning_processor.reasoning_buffer[:50]}...' if reasoning_processor.reasoning_buffer else ''")
|
|
|
297 |
# Flush any remaining buffered content
|
298 |
remaining_content, remaining_reasoning = reasoning_processor.flush_remaining()
|
299 |
|
300 |
# Send any remaining reasoning first
|
301 |
if remaining_reasoning:
|
302 |
+
reasoning_flush_payload = {
|
303 |
+
"id": f"chatcmpl-flush-{int(time.time())}",
|
|
|
304 |
"object": "chat.completion.chunk",
|
305 |
"created": int(time.time()),
|
306 |
"model": request.model,
|
307 |
"choices": [{"index": 0, "delta": {"reasoning_content": remaining_reasoning}, "finish_reason": None}]
|
308 |
}
|
309 |
+
yield f"data: {json.dumps(reasoning_flush_payload)}\n\n"
|
310 |
|
311 |
# Send any remaining content
|
312 |
if remaining_content:
|
313 |
+
content_flush_payload = {
|
314 |
+
"id": f"chatcmpl-flush-{int(time.time())}",
|
|
|
315 |
"object": "chat.completion.chunk",
|
316 |
"created": int(time.time()),
|
317 |
"model": request.model,
|
318 |
"choices": [{"index": 0, "delta": {"content": remaining_content}, "finish_reason": None}]
|
319 |
}
|
320 |
+
yield f"data: {json.dumps(content_flush_payload)}\n\n"
|
321 |
has_sent_content = True
|
322 |
|
323 |
# Always send a finish reason chunk
|
324 |
+
finish_payload = {
|
325 |
+
"id": f"chatcmpl-final-{int(time.time())}", # Kilo Code: Changed ID for clarity
|
326 |
"object": "chat.completion.chunk",
|
327 |
"created": int(time.time()),
|
328 |
"model": request.model,
|
329 |
"choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]
|
330 |
}
|
331 |
+
yield f"data: {json.dumps(finish_payload)}\n\n"
|
332 |
|
333 |
yield "data: [DONE]\n\n"
|
334 |
|
|
|
431 |
gcp_token = _refresh_auth(rotated_credentials)
|
432 |
if not gcp_token:
|
433 |
raise Exception(f"Failed to obtain valid GCP token for OpenAI client (Project: {rotated_project_id}).")
|
|
|
434 |
client = self.create_openai_client(rotated_project_id, gcp_token)
|
435 |
|
436 |
model_id = f"google/{base_model_name}"
|