Spaces:
Running
Running
Commit
·
433bff2
1
Parent(s):
c86c9be
tentative tool call support
Browse files- app/openai_handler.py +33 -18
app/openai_handler.py
CHANGED
@@ -233,10 +233,12 @@ class OpenAIDirectHandler:
|
|
233 |
del delta['extra_content']
|
234 |
|
235 |
content = delta.get('content', '')
|
|
|
236 |
if content:
|
237 |
# print(f"DEBUG: Chunk {chunk_count} - Raw content: '{content}'")
|
238 |
# Use the processor to extract reasoning
|
239 |
processed_content, current_reasoning = reasoning_processor.process_chunk(content)
|
|
|
240 |
|
241 |
# Debug logging for processing results
|
242 |
# if processed_content or current_reasoning:
|
@@ -247,23 +249,27 @@ class OpenAIDirectHandler:
|
|
247 |
|
248 |
# If we have reasoning content, send it
|
249 |
if current_reasoning:
|
250 |
-
|
251 |
-
|
252 |
-
chunks_to_send.append(
|
253 |
|
254 |
# If we have regular content, send it
|
255 |
if processed_content:
|
256 |
-
|
257 |
-
|
258 |
-
chunks_to_send.append(
|
259 |
has_sent_content = True
|
260 |
|
261 |
# Send all chunks
|
262 |
-
for
|
263 |
-
|
|
|
|
|
264 |
else:
|
265 |
# Still yield the chunk even if no content (could have other delta fields)
|
266 |
-
|
|
|
|
|
267 |
else:
|
268 |
# Yield chunks without choices too (they might contain metadata)
|
269 |
yield f"data: {json.dumps(chunk_as_dict)}\n\n"
|
@@ -285,43 +291,52 @@ class OpenAIDirectHandler:
|
|
285 |
|
286 |
# Flush any remaining buffered content
|
287 |
remaining_content, remaining_reasoning = reasoning_processor.flush_remaining()
|
|
|
288 |
|
289 |
# Send any remaining reasoning first
|
290 |
if remaining_reasoning:
|
291 |
# print(f"DEBUG: Flushing remaining reasoning: '{remaining_reasoning[:50]}...' if len(remaining_reasoning) > 50 else '{remaining_reasoning}'")
|
292 |
-
|
293 |
-
"id": f"chatcmpl-{int(time.time())}",
|
294 |
"object": "chat.completion.chunk",
|
295 |
"created": int(time.time()),
|
296 |
"model": request.model,
|
297 |
"choices": [{"index": 0, "delta": {"reasoning_content": remaining_reasoning}, "finish_reason": None}]
|
298 |
}
|
299 |
-
|
|
|
|
|
300 |
|
301 |
# Send any remaining content
|
302 |
if remaining_content:
|
303 |
# print(f"DEBUG: Flushing remaining content: '{remaining_content}'")
|
304 |
-
|
305 |
-
"id": f"chatcmpl-{int(time.time())}",
|
306 |
"object": "chat.completion.chunk",
|
307 |
"created": int(time.time()),
|
308 |
"model": request.model,
|
309 |
"choices": [{"index": 0, "delta": {"content": remaining_content}, "finish_reason": None}]
|
310 |
}
|
311 |
-
|
|
|
|
|
312 |
has_sent_content = True
|
313 |
|
314 |
# Always send a finish reason chunk
|
315 |
-
|
316 |
"id": f"chatcmpl-{int(time.time())}",
|
317 |
"object": "chat.completion.chunk",
|
318 |
"created": int(time.time()),
|
319 |
"model": request.model,
|
320 |
"choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]
|
321 |
}
|
322 |
-
|
|
|
|
|
323 |
|
324 |
-
|
|
|
|
|
325 |
|
326 |
except Exception as stream_error:
|
327 |
error_msg = str(stream_error)
|
|
|
233 |
del delta['extra_content']
|
234 |
|
235 |
content = delta.get('content', '')
|
236 |
+
print(f"DEBUG_OPENAI_STREAM: Chunk {chunk_count} - Original delta content: '{content}'") # Kilo Code Added Log
|
237 |
if content:
|
238 |
# print(f"DEBUG: Chunk {chunk_count} - Raw content: '{content}'")
|
239 |
# Use the processor to extract reasoning
|
240 |
processed_content, current_reasoning = reasoning_processor.process_chunk(content)
|
241 |
+
print(f"DEBUG_OPENAI_STREAM: Chunk {chunk_count} - Processed by StreamingProcessor: processed_content='{processed_content}', current_reasoning='{current_reasoning}'") # Kilo Code Added Log
|
242 |
|
243 |
# Debug logging for processing results
|
244 |
# if processed_content or current_reasoning:
|
|
|
249 |
|
250 |
# If we have reasoning content, send it
|
251 |
if current_reasoning:
|
252 |
+
reasoning_chunk_payload = chunk_as_dict.copy()
|
253 |
+
reasoning_chunk_payload['choices'][0]['delta'] = {'reasoning_content': current_reasoning}
|
254 |
+
chunks_to_send.append(reasoning_chunk_payload)
|
255 |
|
256 |
# If we have regular content, send it
|
257 |
if processed_content:
|
258 |
+
content_chunk_payload = chunk_as_dict.copy()
|
259 |
+
content_chunk_payload['choices'][0]['delta'] = {'content': processed_content}
|
260 |
+
chunks_to_send.append(content_chunk_payload)
|
261 |
has_sent_content = True
|
262 |
|
263 |
# Send all chunks
|
264 |
+
for chunk_to_send_payload in chunks_to_send:
|
265 |
+
yielded_data_str = f"data: {json.dumps(chunk_to_send_payload)}\n\n"
|
266 |
+
print(f"DEBUG_OPENAI_STREAM: Chunk {chunk_count} - Yielding from process_chunk: {yielded_data_str.strip()}") # Kilo Code Added Log
|
267 |
+
yield yielded_data_str
|
268 |
else:
|
269 |
# Still yield the chunk even if no content (could have other delta fields)
|
270 |
+
yielded_data_str = f"data: {json.dumps(chunk_as_dict)}\n\n"
|
271 |
+
print(f"DEBUG_OPENAI_STREAM: Chunk {chunk_count} - Yielding (original delta no content): {yielded_data_str.strip()}") # Kilo Code Added Log
|
272 |
+
yield yielded_data_str
|
273 |
else:
|
274 |
# Yield chunks without choices too (they might contain metadata)
|
275 |
yield f"data: {json.dumps(chunk_as_dict)}\n\n"
|
|
|
291 |
|
292 |
# Flush any remaining buffered content
|
293 |
remaining_content, remaining_reasoning = reasoning_processor.flush_remaining()
|
294 |
+
print(f"DEBUG_OPENAI_STREAM: Flushed from StreamingProcessor: remaining_content='{remaining_content}', remaining_reasoning='{remaining_reasoning}'") # Kilo Code Added Log
|
295 |
|
296 |
# Send any remaining reasoning first
|
297 |
if remaining_reasoning:
|
298 |
# print(f"DEBUG: Flushing remaining reasoning: '{remaining_reasoning[:50]}...' if len(remaining_reasoning) > 50 else '{remaining_reasoning}'")
|
299 |
+
reasoning_flush_payload = {
|
300 |
+
"id": f"chatcmpl-flush-{int(time.time())}", # Kilo Code: Changed ID for clarity
|
301 |
"object": "chat.completion.chunk",
|
302 |
"created": int(time.time()),
|
303 |
"model": request.model,
|
304 |
"choices": [{"index": 0, "delta": {"reasoning_content": remaining_reasoning}, "finish_reason": None}]
|
305 |
}
|
306 |
+
yielded_data_str = f"data: {json.dumps(reasoning_flush_payload)}\n\n"
|
307 |
+
print(f"DEBUG_OPENAI_STREAM: Yielding from flush (reasoning): {yielded_data_str.strip()}") # Kilo Code Added Log
|
308 |
+
yield yielded_data_str
|
309 |
|
310 |
# Send any remaining content
|
311 |
if remaining_content:
|
312 |
# print(f"DEBUG: Flushing remaining content: '{remaining_content}'")
|
313 |
+
content_flush_payload = {
|
314 |
+
"id": f"chatcmpl-flush-{int(time.time())}", # Kilo Code: Changed ID for clarity
|
315 |
"object": "chat.completion.chunk",
|
316 |
"created": int(time.time()),
|
317 |
"model": request.model,
|
318 |
"choices": [{"index": 0, "delta": {"content": remaining_content}, "finish_reason": None}]
|
319 |
}
|
320 |
+
yielded_data_str = f"data: {json.dumps(content_flush_payload)}\n\n"
|
321 |
+
print(f"DEBUG_OPENAI_STREAM: Yielding from flush (content): {yielded_data_str.strip()}") # Kilo Code Added Log
|
322 |
+
yield yielded_data_str
|
323 |
has_sent_content = True
|
324 |
|
325 |
# Always send a finish reason chunk
|
326 |
+
finish_payload = {
|
327 |
"id": f"chatcmpl-{int(time.time())}",
|
328 |
"object": "chat.completion.chunk",
|
329 |
"created": int(time.time()),
|
330 |
"model": request.model,
|
331 |
"choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]
|
332 |
}
|
333 |
+
yielded_data_str = f"data: {json.dumps(finish_payload)}\n\n"
|
334 |
+
print(f"DEBUG_OPENAI_STREAM: Yielding finish chunk: {yielded_data_str.strip()}") # Kilo Code Added Log
|
335 |
+
yield yielded_data_str
|
336 |
|
337 |
+
yielded_data_str = "data: [DONE]\n\n"
|
338 |
+
print(f"DEBUG_OPENAI_STREAM: Yielding DONE: {yielded_data_str.strip()}") # Kilo Code Added Log
|
339 |
+
yield yielded_data_str
|
340 |
|
341 |
except Exception as stream_error:
|
342 |
error_msg = str(stream_error)
|