Spaces:
Building
Building
Commit
·
eef2ebb
1
Parent(s):
0da76bb
tentative tool call support
Browse files- app/api_helpers.py +0 -2
- app/message_processing.py +0 -20
- app/openai_handler.py +13 -50
app/api_helpers.py
CHANGED
@@ -423,7 +423,6 @@ async def execute_gemini_call(
|
|
423 |
block_msg+=f" ({response_obj_call.prompt_feedback.block_reason_message})"
|
424 |
raise ValueError(block_msg)
|
425 |
|
426 |
-
print(f"DEBUG: Raw Gemini response_obj_call before conversion: {response_obj_call}") # Kilo Code Added Log
|
427 |
if not is_gemini_response_valid(response_obj_call):
|
428 |
error_details = f"Invalid non-streaming Gemini response for model string '{model_to_call}'. "
|
429 |
if hasattr(response_obj_call, 'candidates'):
|
@@ -446,5 +445,4 @@ async def execute_gemini_call(
|
|
446 |
raise ValueError(error_details)
|
447 |
|
448 |
openai_response_content = convert_to_openai_format(response_obj_call, request_obj.model)
|
449 |
-
print(f"DEBUG: OpenAI formatted response content before JSONResponse: {openai_response_content}") # Kilo Code Added Log
|
450 |
return JSONResponse(content=openai_response_content)
|
|
|
423 |
block_msg+=f" ({response_obj_call.prompt_feedback.block_reason_message})"
|
424 |
raise ValueError(block_msg)
|
425 |
|
|
|
426 |
if not is_gemini_response_valid(response_obj_call):
|
427 |
error_details = f"Invalid non-streaming Gemini response for model string '{model_to_call}'. "
|
428 |
if hasattr(response_obj_call, 'candidates'):
|
|
|
445 |
raise ValueError(error_details)
|
446 |
|
447 |
openai_response_content = convert_to_openai_format(response_obj_call, request_obj.model)
|
|
|
448 |
return JSONResponse(content=openai_response_content)
|
app/message_processing.py
CHANGED
@@ -311,29 +311,20 @@ def parse_gemini_response_for_reasoning_and_content(gemini_response_candidate: A
|
|
311 |
|
312 |
if gemini_candidate_content and hasattr(gemini_candidate_content, 'parts') and gemini_candidate_content.parts:
|
313 |
for part_item in gemini_candidate_content.parts:
|
314 |
-
print(f"DEBUG: Parsing part_item: {part_item}") # Kilo Code Added Log
|
315 |
if hasattr(part_item, 'function_call') and part_item.function_call is not None: # Kilo Code: Added 'is not None' check
|
316 |
-
print(f"DEBUG: part_item is a function_call, skipping for text parsing.") # Kilo Code Added Log
|
317 |
continue
|
318 |
|
319 |
part_text = ""
|
320 |
if hasattr(part_item, 'text') and part_item.text is not None:
|
321 |
part_text = str(part_item.text)
|
322 |
|
323 |
-
# Kilo Code Added Logs
|
324 |
part_is_thought = hasattr(part_item, 'thought') and part_item.thought is True
|
325 |
-
print(f"DEBUG: part_text: '{part_text}', is_thought: {part_is_thought}")
|
326 |
|
327 |
if part_is_thought:
|
328 |
reasoning_text_parts.append(part_text)
|
329 |
-
print(f"DEBUG: Appended to reasoning_text_parts. Current count: {len(reasoning_text_parts)}") # Kilo Code Added Log
|
330 |
elif part_text: # Only add if it's not a function_call and has text
|
331 |
normal_text_parts.append(part_text)
|
332 |
-
print(f"DEBUG: Appended to normal_text_parts. Current count: {len(normal_text_parts)}") # Kilo Code Added Log
|
333 |
-
else:
|
334 |
-
print(f"DEBUG: part_text is empty or not appended. is_thought: {part_is_thought}") # Kilo Code Added Log
|
335 |
elif candidate_part_text:
|
336 |
-
print(f"DEBUG: Using candidate_part_text: '{candidate_part_text}'") # Kilo Code Added Log
|
337 |
normal_text_parts.append(candidate_part_text)
|
338 |
elif gemini_candidate_content and hasattr(gemini_candidate_content, 'text') and gemini_candidate_content.text is not None:
|
339 |
normal_text_parts.append(str(gemini_candidate_content.text))
|
@@ -371,12 +362,6 @@ def process_gemini_response_to_openai_dict(gemini_response_obj: Any, request_mod
|
|
371 |
for part in candidate.content.parts:
|
372 |
if hasattr(part, 'function_call') and part.function_call is not None: # Kilo Code: Added 'is not None' check
|
373 |
fc = part.function_call
|
374 |
-
# Kilo Code Added Logs
|
375 |
-
print(f"DEBUG: Processing part with function_call. Part: {part}")
|
376 |
-
print(f"DEBUG: FunctionCall object (fc): {fc}")
|
377 |
-
if fc:
|
378 |
-
print(f"DEBUG: fc.name: {getattr(fc, 'name', 'Name attribute does not exist or is None')}")
|
379 |
-
# End Kilo Code Added Logs
|
380 |
tool_call_id = f"call_{base_id}_{i}_{fc.name.replace(' ', '_')}_{int(time.time()*10000 + random.randint(0,9999))}"
|
381 |
|
382 |
if "tool_calls" not in message_payload:
|
@@ -400,9 +385,7 @@ def process_gemini_response_to_openai_dict(gemini_response_obj: Any, request_mod
|
|
400 |
reasoning_str = deobfuscate_text(reasoning_str)
|
401 |
normal_content_str = deobfuscate_text(normal_content_str)
|
402 |
|
403 |
-
print(f"DEBUG_ASSIGN: normal_content_str before assignment to message_payload: '{normal_content_str}'") # Kilo Code Added Log
|
404 |
message_payload["content"] = normal_content_str
|
405 |
-
print(f"DEBUG_ASSIGN: message_payload['content'] after assignment: '{message_payload['content']}'") # Kilo Code Added Log
|
406 |
if reasoning_str:
|
407 |
message_payload['reasoning_content'] = reasoning_str
|
408 |
|
@@ -494,12 +477,10 @@ def convert_chunk_to_openai(chunk: Any, model_name: str, response_id: str, candi
|
|
494 |
break
|
495 |
|
496 |
if not function_call_detected_in_chunk:
|
497 |
-
print(f"DEBUG_STREAM: Raw candidate list in chunk for text processing: {candidate}") # Kilo Code Added Log (Note: 'candidate' here is chunk.candidates)
|
498 |
if candidate and len(candidate) > 0: # Kilo Code: Ensure candidate list is not empty
|
499 |
reasoning_text, normal_text = parse_gemini_response_for_reasoning_and_content(candidate[0]) # Kilo Code: Pass the first Candidate object
|
500 |
else:
|
501 |
reasoning_text, normal_text = "", "" # Default to empty if no candidates
|
502 |
-
print(f"DEBUG_STREAM: Parsed from chunk - reasoning_text: '{reasoning_text}', normal_text: '{normal_text}'") # Kilo Code Added Log
|
503 |
if is_encrypt_full:
|
504 |
reasoning_text = deobfuscate_text(reasoning_text)
|
505 |
normal_text = deobfuscate_text(normal_text)
|
@@ -516,7 +497,6 @@ def convert_chunk_to_openai(chunk: Any, model_name: str, response_id: str, candi
|
|
516 |
# and it's not a terminal chunk, we still send a delta with empty content.
|
517 |
delta_payload['content'] = ""
|
518 |
|
519 |
-
print(f"DEBUG_STREAM: Final delta_payload for chunk: {delta_payload}") # Kilo Code Added Log
|
520 |
chunk_data = {
|
521 |
"id": response_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": model_name,
|
522 |
"choices": [{"index": candidate_index, "delta": delta_payload, "finish_reason": openai_finish_reason}]
|
|
|
311 |
|
312 |
if gemini_candidate_content and hasattr(gemini_candidate_content, 'parts') and gemini_candidate_content.parts:
|
313 |
for part_item in gemini_candidate_content.parts:
|
|
|
314 |
if hasattr(part_item, 'function_call') and part_item.function_call is not None: # Kilo Code: Added 'is not None' check
|
|
|
315 |
continue
|
316 |
|
317 |
part_text = ""
|
318 |
if hasattr(part_item, 'text') and part_item.text is not None:
|
319 |
part_text = str(part_item.text)
|
320 |
|
|
|
321 |
part_is_thought = hasattr(part_item, 'thought') and part_item.thought is True
|
|
|
322 |
|
323 |
if part_is_thought:
|
324 |
reasoning_text_parts.append(part_text)
|
|
|
325 |
elif part_text: # Only add if it's not a function_call and has text
|
326 |
normal_text_parts.append(part_text)
|
|
|
|
|
|
|
327 |
elif candidate_part_text:
|
|
|
328 |
normal_text_parts.append(candidate_part_text)
|
329 |
elif gemini_candidate_content and hasattr(gemini_candidate_content, 'text') and gemini_candidate_content.text is not None:
|
330 |
normal_text_parts.append(str(gemini_candidate_content.text))
|
|
|
362 |
for part in candidate.content.parts:
|
363 |
if hasattr(part, 'function_call') and part.function_call is not None: # Kilo Code: Added 'is not None' check
|
364 |
fc = part.function_call
|
|
|
|
|
|
|
|
|
|
|
|
|
365 |
tool_call_id = f"call_{base_id}_{i}_{fc.name.replace(' ', '_')}_{int(time.time()*10000 + random.randint(0,9999))}"
|
366 |
|
367 |
if "tool_calls" not in message_payload:
|
|
|
385 |
reasoning_str = deobfuscate_text(reasoning_str)
|
386 |
normal_content_str = deobfuscate_text(normal_content_str)
|
387 |
|
|
|
388 |
message_payload["content"] = normal_content_str
|
|
|
389 |
if reasoning_str:
|
390 |
message_payload['reasoning_content'] = reasoning_str
|
391 |
|
|
|
477 |
break
|
478 |
|
479 |
if not function_call_detected_in_chunk:
|
|
|
480 |
if candidate and len(candidate) > 0: # Kilo Code: Ensure candidate list is not empty
|
481 |
reasoning_text, normal_text = parse_gemini_response_for_reasoning_and_content(candidate[0]) # Kilo Code: Pass the first Candidate object
|
482 |
else:
|
483 |
reasoning_text, normal_text = "", "" # Default to empty if no candidates
|
|
|
484 |
if is_encrypt_full:
|
485 |
reasoning_text = deobfuscate_text(reasoning_text)
|
486 |
normal_text = deobfuscate_text(normal_text)
|
|
|
497 |
# and it's not a terminal chunk, we still send a delta with empty content.
|
498 |
delta_payload['content'] = ""
|
499 |
|
|
|
500 |
chunk_data = {
|
501 |
"id": response_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": model_name,
|
502 |
"choices": [{"index": candidate_index, "delta": delta_payload, "finish_reason": openai_finish_reason}]
|
app/openai_handler.py
CHANGED
@@ -233,19 +233,11 @@ class OpenAIDirectHandler:
|
|
233 |
del delta['extra_content']
|
234 |
|
235 |
content = delta.get('content', '')
|
236 |
-
print(f"DEBUG_OPENAI_STREAM: Chunk {chunk_count} - Original delta content: '{content}'") # Kilo Code Added Log
|
237 |
if content:
|
238 |
-
# print(f"DEBUG: Chunk {chunk_count} - Raw content: '{content}'")
|
239 |
# Use the processor to extract reasoning
|
240 |
processed_content, current_reasoning = reasoning_processor.process_chunk(content)
|
241 |
-
print(f"DEBUG_OPENAI_STREAM: Chunk {chunk_count} - Processed by StreamingProcessor: processed_content='{processed_content}', current_reasoning='{current_reasoning}'") # Kilo Code Added Log
|
242 |
-
|
243 |
-
# Debug logging for processing results
|
244 |
-
# if processed_content or current_reasoning:
|
245 |
-
# print(f"DEBUG: Chunk {chunk_count} - Processed content: '{processed_content}', Reasoning: '{current_reasoning[:50]}...' if len(current_reasoning) > 50 else '{current_reasoning}'")
|
246 |
|
247 |
# Send chunks for both reasoning and content as they arrive
|
248 |
-
# Kilo Code: Revised payload construction
|
249 |
original_choice = chunk_as_dict['choices'][0]
|
250 |
original_finish_reason = original_choice.get('finish_reason')
|
251 |
original_usage = original_choice.get('usage')
|
@@ -257,15 +249,10 @@ class OpenAIDirectHandler:
|
|
257 |
"created": chunk_as_dict["created"], "model": chunk_as_dict["model"],
|
258 |
"choices": [{"index": 0, "delta": reasoning_delta, "finish_reason": None}]
|
259 |
}
|
260 |
-
|
261 |
-
print(f"DEBUG_OPENAI_STREAM: Chunk {chunk_count} - Yielding REASONING: {yielded_data_str.strip()}")
|
262 |
-
yield yielded_data_str
|
263 |
|
264 |
if processed_content:
|
265 |
content_delta = {'content': processed_content}
|
266 |
-
# Determine if this processed_content chunk should carry the original finish_reason and usage.
|
267 |
-
# It should if the reasoning processor is NOT inside a tag after this,
|
268 |
-
# meaning this processed_content is the final part of any tagged content from original_content_from_delta.
|
269 |
finish_reason_for_this_content_delta = None
|
270 |
usage_for_this_content_delta = None
|
271 |
|
@@ -282,25 +269,13 @@ class OpenAIDirectHandler:
|
|
282 |
if usage_for_this_content_delta:
|
283 |
content_payload['choices'][0]['usage'] = usage_for_this_content_delta
|
284 |
|
285 |
-
|
286 |
-
print(f"DEBUG_OPENAI_STREAM: Chunk {chunk_count} - Yielding CONTENT: {yielded_data_str.strip()}")
|
287 |
-
yield yielded_data_str
|
288 |
has_sent_content = True
|
289 |
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
elif original_finish_reason: # Original delta had no content, but had a finish_reason
|
295 |
-
# This case handles chunks that are purely for signaling stream end or other non-content states.
|
296 |
-
yielded_data_str = f"data: {json.dumps(chunk_as_dict)}\n\n" # Yield original chunk as is
|
297 |
-
print(f"DEBUG_OPENAI_STREAM: Chunk {chunk_count} - Yielding (original delta no content, but has finish_reason): {yielded_data_str.strip()}")
|
298 |
-
yield yielded_data_str
|
299 |
-
# If original delta had no content and no finish_reason, it's an empty delta, yield as is.
|
300 |
-
elif not content and not original_finish_reason : # Kilo Code: Added this condition
|
301 |
-
yielded_data_str = f"data: {json.dumps(chunk_as_dict)}\n\n"
|
302 |
-
print(f"DEBUG_OPENAI_STREAM: Chunk {chunk_count} - Yielding (original delta empty): {yielded_data_str.strip()}")
|
303 |
-
yield yielded_data_str
|
304 |
else:
|
305 |
# Yield chunks without choices too (they might contain metadata)
|
306 |
yield f"data: {json.dumps(chunk_as_dict)}\n\n"
|
@@ -319,55 +294,43 @@ class OpenAIDirectHandler:
|
|
319 |
# print(f"DEBUG: Stream ended after {chunk_count} chunks. Buffer state - tag_buffer: '{reasoning_processor.tag_buffer}', "
|
320 |
# f"inside_tag: {reasoning_processor.inside_tag}, "
|
321 |
# f"reasoning_buffer: '{reasoning_processor.reasoning_buffer[:50]}...' if reasoning_processor.reasoning_buffer else ''")
|
322 |
-
|
323 |
# Flush any remaining buffered content
|
324 |
remaining_content, remaining_reasoning = reasoning_processor.flush_remaining()
|
325 |
-
print(f"DEBUG_OPENAI_STREAM: Flushed from StreamingProcessor: remaining_content='{remaining_content}', remaining_reasoning='{remaining_reasoning}'") # Kilo Code Added Log
|
326 |
|
327 |
# Send any remaining reasoning first
|
328 |
if remaining_reasoning:
|
329 |
-
# print(f"DEBUG: Flushing remaining reasoning: '{remaining_reasoning[:50]}...' if len(remaining_reasoning) > 50 else '{remaining_reasoning}'")
|
330 |
reasoning_flush_payload = {
|
331 |
-
"id": f"chatcmpl-flush-{int(time.time())}",
|
332 |
"object": "chat.completion.chunk",
|
333 |
"created": int(time.time()),
|
334 |
"model": request.model,
|
335 |
"choices": [{"index": 0, "delta": {"reasoning_content": remaining_reasoning}, "finish_reason": None}]
|
336 |
}
|
337 |
-
|
338 |
-
print(f"DEBUG_OPENAI_STREAM: Yielding from flush (reasoning): {yielded_data_str.strip()}") # Kilo Code Added Log
|
339 |
-
yield yielded_data_str
|
340 |
|
341 |
# Send any remaining content
|
342 |
if remaining_content:
|
343 |
-
# print(f"DEBUG: Flushing remaining content: '{remaining_content}'")
|
344 |
content_flush_payload = {
|
345 |
-
"id": f"chatcmpl-flush-{int(time.time())}",
|
346 |
"object": "chat.completion.chunk",
|
347 |
"created": int(time.time()),
|
348 |
"model": request.model,
|
349 |
"choices": [{"index": 0, "delta": {"content": remaining_content}, "finish_reason": None}]
|
350 |
}
|
351 |
-
|
352 |
-
print(f"DEBUG_OPENAI_STREAM: Yielding from flush (content): {yielded_data_str.strip()}") # Kilo Code Added Log
|
353 |
-
yield yielded_data_str
|
354 |
has_sent_content = True
|
355 |
|
356 |
# Always send a finish reason chunk
|
357 |
finish_payload = {
|
358 |
-
"id": f"chatcmpl-{int(time.time())}",
|
359 |
"object": "chat.completion.chunk",
|
360 |
"created": int(time.time()),
|
361 |
"model": request.model,
|
362 |
"choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]
|
363 |
}
|
364 |
-
|
365 |
-
print(f"DEBUG_OPENAI_STREAM: Yielding finish chunk: {yielded_data_str.strip()}") # Kilo Code Added Log
|
366 |
-
yield yielded_data_str
|
367 |
|
368 |
-
|
369 |
-
print(f"DEBUG_OPENAI_STREAM: Yielding DONE: {yielded_data_str.strip()}") # Kilo Code Added Log
|
370 |
-
yield yielded_data_str
|
371 |
|
372 |
except Exception as stream_error:
|
373 |
error_msg = str(stream_error)
|
|
|
233 |
del delta['extra_content']
|
234 |
|
235 |
content = delta.get('content', '')
|
|
|
236 |
if content:
|
|
|
237 |
# Use the processor to extract reasoning
|
238 |
processed_content, current_reasoning = reasoning_processor.process_chunk(content)
|
|
|
|
|
|
|
|
|
|
|
239 |
|
240 |
# Send chunks for both reasoning and content as they arrive
|
|
|
241 |
original_choice = chunk_as_dict['choices'][0]
|
242 |
original_finish_reason = original_choice.get('finish_reason')
|
243 |
original_usage = original_choice.get('usage')
|
|
|
249 |
"created": chunk_as_dict["created"], "model": chunk_as_dict["model"],
|
250 |
"choices": [{"index": 0, "delta": reasoning_delta, "finish_reason": None}]
|
251 |
}
|
252 |
+
yield f"data: {json.dumps(reasoning_payload)}\n\n"
|
|
|
|
|
253 |
|
254 |
if processed_content:
|
255 |
content_delta = {'content': processed_content}
|
|
|
|
|
|
|
256 |
finish_reason_for_this_content_delta = None
|
257 |
usage_for_this_content_delta = None
|
258 |
|
|
|
269 |
if usage_for_this_content_delta:
|
270 |
content_payload['choices'][0]['usage'] = usage_for_this_content_delta
|
271 |
|
272 |
+
yield f"data: {json.dumps(content_payload)}\n\n"
|
|
|
|
|
273 |
has_sent_content = True
|
274 |
|
275 |
+
elif original_choice.get('finish_reason'): # Check original_choice for finish_reason
|
276 |
+
yield f"data: {json.dumps(chunk_as_dict)}\n\n"
|
277 |
+
elif not content and not original_choice.get('finish_reason') :
|
278 |
+
yield f"data: {json.dumps(chunk_as_dict)}\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
279 |
else:
|
280 |
# Yield chunks without choices too (they might contain metadata)
|
281 |
yield f"data: {json.dumps(chunk_as_dict)}\n\n"
|
|
|
294 |
# print(f"DEBUG: Stream ended after {chunk_count} chunks. Buffer state - tag_buffer: '{reasoning_processor.tag_buffer}', "
|
295 |
# f"inside_tag: {reasoning_processor.inside_tag}, "
|
296 |
# f"reasoning_buffer: '{reasoning_processor.reasoning_buffer[:50]}...' if reasoning_processor.reasoning_buffer else ''")
|
|
|
297 |
# Flush any remaining buffered content
|
298 |
remaining_content, remaining_reasoning = reasoning_processor.flush_remaining()
|
|
|
299 |
|
300 |
# Send any remaining reasoning first
|
301 |
if remaining_reasoning:
|
|
|
302 |
reasoning_flush_payload = {
|
303 |
+
"id": f"chatcmpl-flush-{int(time.time())}",
|
304 |
"object": "chat.completion.chunk",
|
305 |
"created": int(time.time()),
|
306 |
"model": request.model,
|
307 |
"choices": [{"index": 0, "delta": {"reasoning_content": remaining_reasoning}, "finish_reason": None}]
|
308 |
}
|
309 |
+
yield f"data: {json.dumps(reasoning_flush_payload)}\n\n"
|
|
|
|
|
310 |
|
311 |
# Send any remaining content
|
312 |
if remaining_content:
|
|
|
313 |
content_flush_payload = {
|
314 |
+
"id": f"chatcmpl-flush-{int(time.time())}",
|
315 |
"object": "chat.completion.chunk",
|
316 |
"created": int(time.time()),
|
317 |
"model": request.model,
|
318 |
"choices": [{"index": 0, "delta": {"content": remaining_content}, "finish_reason": None}]
|
319 |
}
|
320 |
+
yield f"data: {json.dumps(content_flush_payload)}\n\n"
|
|
|
|
|
321 |
has_sent_content = True
|
322 |
|
323 |
# Always send a finish reason chunk
|
324 |
finish_payload = {
|
325 |
+
"id": f"chatcmpl-final-{int(time.time())}", # Kilo Code: Changed ID for clarity
|
326 |
"object": "chat.completion.chunk",
|
327 |
"created": int(time.time()),
|
328 |
"model": request.model,
|
329 |
"choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]
|
330 |
}
|
331 |
+
yield f"data: {json.dumps(finish_payload)}\n\n"
|
|
|
|
|
332 |
|
333 |
+
yield "data: [DONE]\n\n"
|
|
|
|
|
334 |
|
335 |
except Exception as stream_error:
|
336 |
error_msg = str(stream_error)
|