Spaces:

bibibi12345
/

vertex

Building

App Files Files Community

bibibi12345 commited on Jun 13

Commit

eef2ebb

1 Parent(s): 0da76bb

tentative tool call support

Browse files

Files changed (3) hide show

app/api_helpers.py +0 -2
app/message_processing.py +0 -20
app/openai_handler.py +13 -50

app/api_helpers.py CHANGED Viewed

@@ -423,7 +423,6 @@ async def execute_gemini_call(
                 block_msg+=f" ({response_obj_call.prompt_feedback.block_reason_message})"
             raise ValueError(block_msg)
-        print(f"DEBUG: Raw Gemini response_obj_call before conversion: {response_obj_call}") # Kilo Code Added Log
         if not is_gemini_response_valid(response_obj_call):
             error_details = f"Invalid non-streaming Gemini response for model string '{model_to_call}'. "
             if hasattr(response_obj_call, 'candidates'):
@@ -446,5 +445,4 @@ async def execute_gemini_call(
             raise ValueError(error_details)
         openai_response_content = convert_to_openai_format(response_obj_call, request_obj.model)
-        print(f"DEBUG: OpenAI formatted response content before JSONResponse: {openai_response_content}") # Kilo Code Added Log
         return JSONResponse(content=openai_response_content)

                 block_msg+=f" ({response_obj_call.prompt_feedback.block_reason_message})"
             raise ValueError(block_msg)
         if not is_gemini_response_valid(response_obj_call):
             error_details = f"Invalid non-streaming Gemini response for model string '{model_to_call}'. "
             if hasattr(response_obj_call, 'candidates'):
             raise ValueError(error_details)
         openai_response_content = convert_to_openai_format(response_obj_call, request_obj.model)
         return JSONResponse(content=openai_response_content)

app/message_processing.py CHANGED Viewed

@@ -311,29 +311,20 @@ def parse_gemini_response_for_reasoning_and_content(gemini_response_candidate: A
     if gemini_candidate_content and hasattr(gemini_candidate_content, 'parts') and gemini_candidate_content.parts:
         for part_item in gemini_candidate_content.parts:
-            print(f"DEBUG: Parsing part_item: {part_item}") # Kilo Code Added Log
             if hasattr(part_item, 'function_call') and part_item.function_call is not None: # Kilo Code: Added 'is not None' check
-                print(f"DEBUG: part_item is a function_call, skipping for text parsing.") # Kilo Code Added Log
                 continue
             part_text = ""
             if hasattr(part_item, 'text') and part_item.text is not None:
                 part_text = str(part_item.text)
-            # Kilo Code Added Logs
             part_is_thought = hasattr(part_item, 'thought') and part_item.thought is True
-            print(f"DEBUG: part_text: '{part_text}', is_thought: {part_is_thought}")
             if part_is_thought:
                 reasoning_text_parts.append(part_text)
-                print(f"DEBUG: Appended to reasoning_text_parts. Current count: {len(reasoning_text_parts)}") # Kilo Code Added Log
             elif part_text: # Only add if it's not a function_call and has text
                 normal_text_parts.append(part_text)
-                print(f"DEBUG: Appended to normal_text_parts. Current count: {len(normal_text_parts)}") # Kilo Code Added Log
-            else:
-                print(f"DEBUG: part_text is empty or not appended. is_thought: {part_is_thought}") # Kilo Code Added Log
     elif candidate_part_text:
-        print(f"DEBUG: Using candidate_part_text: '{candidate_part_text}'") # Kilo Code Added Log
         normal_text_parts.append(candidate_part_text)
     elif gemini_candidate_content and hasattr(gemini_candidate_content, 'text') and gemini_candidate_content.text is not None:
         normal_text_parts.append(str(gemini_candidate_content.text))
@@ -371,12 +362,6 @@ def process_gemini_response_to_openai_dict(gemini_response_obj: Any, request_mod
                 for part in candidate.content.parts:
                     if hasattr(part, 'function_call') and part.function_call is not None: # Kilo Code: Added 'is not None' check
                         fc = part.function_call
-                        # Kilo Code Added Logs
-                        print(f"DEBUG: Processing part with function_call. Part: {part}")
-                        print(f"DEBUG: FunctionCall object (fc): {fc}")
-                        if fc:
-                            print(f"DEBUG: fc.name: {getattr(fc, 'name', 'Name attribute does not exist or is None')}")
-                        # End Kilo Code Added Logs
                         tool_call_id = f"call_{base_id}_{i}_{fc.name.replace(' ', '_')}_{int(time.time()*10000 + random.randint(0,9999))}"
                         if "tool_calls" not in message_payload:
@@ -400,9 +385,7 @@ def process_gemini_response_to_openai_dict(gemini_response_obj: Any, request_mod
                     reasoning_str = deobfuscate_text(reasoning_str)
                     normal_content_str = deobfuscate_text(normal_content_str)
-                print(f"DEBUG_ASSIGN: normal_content_str before assignment to message_payload: '{normal_content_str}'") # Kilo Code Added Log
                 message_payload["content"] = normal_content_str
-                print(f"DEBUG_ASSIGN: message_payload['content'] after assignment: '{message_payload['content']}'") # Kilo Code Added Log
                 if reasoning_str:
                     message_payload['reasoning_content'] = reasoning_str
@@ -494,12 +477,10 @@ def convert_chunk_to_openai(chunk: Any, model_name: str, response_id: str, candi
                     break
         if not function_call_detected_in_chunk:
-            print(f"DEBUG_STREAM: Raw candidate list in chunk for text processing: {candidate}") # Kilo Code Added Log (Note: 'candidate' here is chunk.candidates)
             if candidate and len(candidate) > 0: # Kilo Code: Ensure candidate list is not empty
                 reasoning_text, normal_text = parse_gemini_response_for_reasoning_and_content(candidate[0]) # Kilo Code: Pass the first Candidate object
             else:
                 reasoning_text, normal_text = "", "" # Default to empty if no candidates
-            print(f"DEBUG_STREAM: Parsed from chunk - reasoning_text: '{reasoning_text}', normal_text: '{normal_text}'") # Kilo Code Added Log
             if is_encrypt_full:
                 reasoning_text = deobfuscate_text(reasoning_text)
                 normal_text = deobfuscate_text(normal_text)
@@ -516,7 +497,6 @@ def convert_chunk_to_openai(chunk: Any, model_name: str, response_id: str, candi
         # and it's not a terminal chunk, we still send a delta with empty content.
         delta_payload['content'] = ""
-    print(f"DEBUG_STREAM: Final delta_payload for chunk: {delta_payload}") # Kilo Code Added Log
     chunk_data = {
         "id": response_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": model_name,
         "choices": [{"index": candidate_index, "delta": delta_payload, "finish_reason": openai_finish_reason}]

     if gemini_candidate_content and hasattr(gemini_candidate_content, 'parts') and gemini_candidate_content.parts:
         for part_item in gemini_candidate_content.parts:
             if hasattr(part_item, 'function_call') and part_item.function_call is not None: # Kilo Code: Added 'is not None' check
                 continue
             part_text = ""
             if hasattr(part_item, 'text') and part_item.text is not None:
                 part_text = str(part_item.text)
             part_is_thought = hasattr(part_item, 'thought') and part_item.thought is True
             if part_is_thought:
                 reasoning_text_parts.append(part_text)
             elif part_text: # Only add if it's not a function_call and has text
                 normal_text_parts.append(part_text)
     elif candidate_part_text:
         normal_text_parts.append(candidate_part_text)
     elif gemini_candidate_content and hasattr(gemini_candidate_content, 'text') and gemini_candidate_content.text is not None:
         normal_text_parts.append(str(gemini_candidate_content.text))
                 for part in candidate.content.parts:
                     if hasattr(part, 'function_call') and part.function_call is not None: # Kilo Code: Added 'is not None' check
                         fc = part.function_call
                         tool_call_id = f"call_{base_id}_{i}_{fc.name.replace(' ', '_')}_{int(time.time()*10000 + random.randint(0,9999))}"
                         if "tool_calls" not in message_payload:
                     reasoning_str = deobfuscate_text(reasoning_str)
                     normal_content_str = deobfuscate_text(normal_content_str)
                 message_payload["content"] = normal_content_str
                 if reasoning_str:
                     message_payload['reasoning_content'] = reasoning_str
                     break
         if not function_call_detected_in_chunk:
             if candidate and len(candidate) > 0: # Kilo Code: Ensure candidate list is not empty
                 reasoning_text, normal_text = parse_gemini_response_for_reasoning_and_content(candidate[0]) # Kilo Code: Pass the first Candidate object
             else:
                 reasoning_text, normal_text = "", "" # Default to empty if no candidates
             if is_encrypt_full:
                 reasoning_text = deobfuscate_text(reasoning_text)
                 normal_text = deobfuscate_text(normal_text)
         # and it's not a terminal chunk, we still send a delta with empty content.
         delta_payload['content'] = ""
     chunk_data = {
         "id": response_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": model_name,
         "choices": [{"index": candidate_index, "delta": delta_payload, "finish_reason": openai_finish_reason}]

app/openai_handler.py CHANGED Viewed

@@ -233,19 +233,11 @@ class OpenAIDirectHandler:
                                 del delta['extra_content']
                             content = delta.get('content', '')
-                            print(f"DEBUG_OPENAI_STREAM: Chunk {chunk_count} - Original delta content: '{content}'") # Kilo Code Added Log
                             if content:
-                                # print(f"DEBUG: Chunk {chunk_count} - Raw content: '{content}'")
                                 # Use the processor to extract reasoning
                                 processed_content, current_reasoning = reasoning_processor.process_chunk(content)
-                                print(f"DEBUG_OPENAI_STREAM: Chunk {chunk_count} - Processed by StreamingProcessor: processed_content='{processed_content}', current_reasoning='{current_reasoning}'") # Kilo Code Added Log
-                                # Debug logging for processing results
-                                # if processed_content or current_reasoning:
-                                #     print(f"DEBUG: Chunk {chunk_count} - Processed content: '{processed_content}', Reasoning: '{current_reasoning[:50]}...' if len(current_reasoning) > 50 else '{current_reasoning}'")
                                 # Send chunks for both reasoning and content as they arrive
-                                # Kilo Code: Revised payload construction
                                 original_choice = chunk_as_dict['choices'][0]
                                 original_finish_reason = original_choice.get('finish_reason')
                                 original_usage = original_choice.get('usage')
@@ -257,15 +249,10 @@ class OpenAIDirectHandler:
                                         "created": chunk_as_dict["created"], "model": chunk_as_dict["model"],
                                         "choices": [{"index": 0, "delta": reasoning_delta, "finish_reason": None}]
                                     }
-                                    yielded_data_str = f"data: {json.dumps(reasoning_payload)}\n\n"
-                                    print(f"DEBUG_OPENAI_STREAM: Chunk {chunk_count} - Yielding REASONING: {yielded_data_str.strip()}")
-                                    yield yielded_data_str
                                 if processed_content:
                                     content_delta = {'content': processed_content}
-                                    # Determine if this processed_content chunk should carry the original finish_reason and usage.
-                                    # It should if the reasoning processor is NOT inside a tag after this,
-                                    # meaning this processed_content is the final part of any tagged content from original_content_from_delta.
                                     finish_reason_for_this_content_delta = None
                                     usage_for_this_content_delta = None
@@ -282,25 +269,13 @@ class OpenAIDirectHandler:
                                     if usage_for_this_content_delta:
                                         content_payload['choices'][0]['usage'] = usage_for_this_content_delta
-                                    yielded_data_str = f"data: {json.dumps(content_payload)}\n\n"
-                                    print(f"DEBUG_OPENAI_STREAM: Chunk {chunk_count} - Yielding CONTENT: {yielded_data_str.strip()}")
-                                    yield yielded_data_str
                                     has_sent_content = True
-                                # If neither reasoning nor processed_content was generated from a non-empty original content,
-                                # it implies the original content might have been just a tag or something consumed entirely.
-                                # In this case, do nothing more for this path; the original chunk's other properties (like finish_reason if content was empty) are handled below.
-                            elif original_finish_reason: # Original delta had no content, but had a finish_reason
-                                # This case handles chunks that are purely for signaling stream end or other non-content states.
-                                yielded_data_str = f"data: {json.dumps(chunk_as_dict)}\n\n" # Yield original chunk as is
-                                print(f"DEBUG_OPENAI_STREAM: Chunk {chunk_count} - Yielding (original delta no content, but has finish_reason): {yielded_data_str.strip()}")
-                                yield yielded_data_str
-                            # If original delta had no content and no finish_reason, it's an empty delta, yield as is.
-                            elif not content and not original_finish_reason : # Kilo Code: Added this condition
-                                yielded_data_str = f"data: {json.dumps(chunk_as_dict)}\n\n"
-                                print(f"DEBUG_OPENAI_STREAM: Chunk {chunk_count} - Yielding (original delta empty): {yielded_data_str.strip()}")
-                                yield yielded_data_str
                     else:
                         # Yield chunks without choices too (they might contain metadata)
                         yield f"data: {json.dumps(chunk_as_dict)}\n\n"
@@ -319,55 +294,43 @@ class OpenAIDirectHandler:
             # print(f"DEBUG: Stream ended after {chunk_count} chunks. Buffer state - tag_buffer: '{reasoning_processor.tag_buffer}', "
             #       f"inside_tag: {reasoning_processor.inside_tag}, "
             #       f"reasoning_buffer: '{reasoning_processor.reasoning_buffer[:50]}...' if reasoning_processor.reasoning_buffer else ''")
             # Flush any remaining buffered content
             remaining_content, remaining_reasoning = reasoning_processor.flush_remaining()
-            print(f"DEBUG_OPENAI_STREAM: Flushed from StreamingProcessor: remaining_content='{remaining_content}', remaining_reasoning='{remaining_reasoning}'") # Kilo Code Added Log
             # Send any remaining reasoning first
             if remaining_reasoning:
-                # print(f"DEBUG: Flushing remaining reasoning: '{remaining_reasoning[:50]}...' if len(remaining_reasoning) > 50 else '{remaining_reasoning}'")
                 reasoning_flush_payload = {
-                    "id": f"chatcmpl-flush-{int(time.time())}", # Kilo Code: Changed ID for clarity
                     "object": "chat.completion.chunk",
                     "created": int(time.time()),
                     "model": request.model,
                     "choices": [{"index": 0, "delta": {"reasoning_content": remaining_reasoning}, "finish_reason": None}]
                 }
-                yielded_data_str = f"data: {json.dumps(reasoning_flush_payload)}\n\n"
-                print(f"DEBUG_OPENAI_STREAM: Yielding from flush (reasoning): {yielded_data_str.strip()}") # Kilo Code Added Log
-                yield yielded_data_str
             # Send any remaining content
             if remaining_content:
-                # print(f"DEBUG: Flushing remaining content: '{remaining_content}'")
                 content_flush_payload = {
-                    "id": f"chatcmpl-flush-{int(time.time())}", # Kilo Code: Changed ID for clarity
                     "object": "chat.completion.chunk",
                     "created": int(time.time()),
                     "model": request.model,
                     "choices": [{"index": 0, "delta": {"content": remaining_content}, "finish_reason": None}]
                 }
-                yielded_data_str = f"data: {json.dumps(content_flush_payload)}\n\n"
-                print(f"DEBUG_OPENAI_STREAM: Yielding from flush (content): {yielded_data_str.strip()}") # Kilo Code Added Log
-                yield yielded_data_str
                 has_sent_content = True
             # Always send a finish reason chunk
             finish_payload = {
-                "id": f"chatcmpl-{int(time.time())}",
                 "object": "chat.completion.chunk",
                 "created": int(time.time()),
                 "model": request.model,
                 "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]
             }
-            yielded_data_str = f"data: {json.dumps(finish_payload)}\n\n"
-            print(f"DEBUG_OPENAI_STREAM: Yielding finish chunk: {yielded_data_str.strip()}") # Kilo Code Added Log
-            yield yielded_data_str
-            yielded_data_str = "data: [DONE]\n\n"
-            print(f"DEBUG_OPENAI_STREAM: Yielding DONE: {yielded_data_str.strip()}") # Kilo Code Added Log
-            yield yielded_data_str
         except Exception as stream_error:
             error_msg = str(stream_error)

                                 del delta['extra_content']
                             content = delta.get('content', '')
                             if content:
                                 # Use the processor to extract reasoning
                                 processed_content, current_reasoning = reasoning_processor.process_chunk(content)
                                 # Send chunks for both reasoning and content as they arrive
                                 original_choice = chunk_as_dict['choices'][0]
                                 original_finish_reason = original_choice.get('finish_reason')
                                 original_usage = original_choice.get('usage')
                                         "created": chunk_as_dict["created"], "model": chunk_as_dict["model"],
                                         "choices": [{"index": 0, "delta": reasoning_delta, "finish_reason": None}]
                                     }
+                                    yield f"data: {json.dumps(reasoning_payload)}\n\n"
                                 if processed_content:
                                     content_delta = {'content': processed_content}
                                     finish_reason_for_this_content_delta = None
                                     usage_for_this_content_delta = None
                                     if usage_for_this_content_delta:
                                         content_payload['choices'][0]['usage'] = usage_for_this_content_delta
+                                    yield f"data: {json.dumps(content_payload)}\n\n"
                                     has_sent_content = True
+                            elif original_choice.get('finish_reason'): # Check original_choice for finish_reason
+                                yield f"data: {json.dumps(chunk_as_dict)}\n\n"
+                            elif not content and not original_choice.get('finish_reason') :
+                                yield f"data: {json.dumps(chunk_as_dict)}\n\n"
                     else:
                         # Yield chunks without choices too (they might contain metadata)
                         yield f"data: {json.dumps(chunk_as_dict)}\n\n"
             # print(f"DEBUG: Stream ended after {chunk_count} chunks. Buffer state - tag_buffer: '{reasoning_processor.tag_buffer}', "
             #       f"inside_tag: {reasoning_processor.inside_tag}, "
             #       f"reasoning_buffer: '{reasoning_processor.reasoning_buffer[:50]}...' if reasoning_processor.reasoning_buffer else ''")
             # Flush any remaining buffered content
             remaining_content, remaining_reasoning = reasoning_processor.flush_remaining()
             # Send any remaining reasoning first
             if remaining_reasoning:
                 reasoning_flush_payload = {
+                    "id": f"chatcmpl-flush-{int(time.time())}",
                     "object": "chat.completion.chunk",
                     "created": int(time.time()),
                     "model": request.model,
                     "choices": [{"index": 0, "delta": {"reasoning_content": remaining_reasoning}, "finish_reason": None}]
                 }
+                yield f"data: {json.dumps(reasoning_flush_payload)}\n\n"
             # Send any remaining content
             if remaining_content:
                 content_flush_payload = {
+                    "id": f"chatcmpl-flush-{int(time.time())}",
                     "object": "chat.completion.chunk",
                     "created": int(time.time()),
                     "model": request.model,
                     "choices": [{"index": 0, "delta": {"content": remaining_content}, "finish_reason": None}]
                 }
+                yield f"data: {json.dumps(content_flush_payload)}\n\n"
                 has_sent_content = True
             # Always send a finish reason chunk
             finish_payload = {
+                "id": f"chatcmpl-final-{int(time.time())}", # Kilo Code: Changed ID for clarity
                 "object": "chat.completion.chunk",
                 "created": int(time.time()),
                 "model": request.model,
                 "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]
             }
+            yield f"data: {json.dumps(finish_payload)}\n\n"
+            yield "data: [DONE]\n\n"
         except Exception as stream_error:
             error_msg = str(stream_error)