Spaces:

AIMaster7
/

Mono

Running

App Files Files Community

AIMaster7 commited on Jul 4

Commit

1fc362c

verified ·

1 Parent(s): c8a5a1f

Update main.py

Browse files

Files changed (1) hide show

main.py +14 -29

main.py CHANGED Viewed

@@ -123,7 +123,7 @@ Using tools is recommended.
                             if line.startswith("0:"):
                                 try:
                                     content_piece = json.loads(line[2:])
-                                    # print(content_piece)
                                     # Buffer the first few chunks
                                     if len(chunks_buffer) < max_initial_chunks:
                                         chunks_buffer.append(content_piece)
@@ -134,19 +134,6 @@ Using tools is recommended.
                                         if "<tool_call>" in full_buffer:
                                             print("Tool call detected")
                                             is_tool_call = True
-                                        else:
-                                            # No tool call, send buffered chunks as regular content
-                                            delta = {"content": full_buffer, "tool_calls": None}
-                                            if is_first_chunk:
-                                                delta["role"] = "assistant"
-                                                is_first_chunk = False
-                                            chunk_data = {
-                                                "id": chat_id, "object": "chat.completion.chunk", "created": created,
-                                                "model": model_id,
-                                                "choices": [{"index": 0, "delta": delta, "finish_reason": None}],
-                                                "usage": None
-                                            }
-                                            yield f"data: {json.dumps(chunk_data)}\n\n"
                                     # Process the current chunk
                                     if is_tool_call:
@@ -181,11 +168,22 @@ Using tools is recommended.
                                         else:
                                             continue
                                     else:
                                         # Regular content
-                                        delta = {"content": content_piece, "tool_calls": None}
                                         if is_first_chunk:
                                             delta["role"] = "assistant"
                                             is_first_chunk = False
                                         chunk_data = {
                                             "id": chat_id, "object": "chat.completion.chunk", "created": created,
                                             "model": model_id,
@@ -199,20 +197,7 @@ Using tools is recommended.
                                     usage_info = json.loads(line[2:]).get("usage")
                                 except (json.JSONDecodeError, AttributeError): pass
                                 break
-                        # Handle any remaining buffer content
-                        if chunks_buffer and not is_tool_call:
-                            full_buffer = ''.join(chunks_buffer)
-                            delta = {"content": full_buffer, "tool_calls": None}
-                            if is_first_chunk:
-                                delta["role"] = "assistant"
-                                is_first_chunk = False
-                            chunk_data = {
-                                "id": chat_id, "object": "chat.completion.chunk", "created": created,
-                                "model": model_id,
-                                "choices": [{"index": 0, "delta": delta, "finish_reason": None}],
-                                "usage": None
-                            }
-                            yield f"data: {json.dumps(chunk_data)}\n\n"
                         final_usage = None
                         if usage_info:
                             prompt_tokens = usage_info.get("promptTokens", 0)

                             if line.startswith("0:"):
                                 try:
                                     content_piece = json.loads(line[2:])
+                                    print(content_piece)
                                     # Buffer the first few chunks
                                     if len(chunks_buffer) < max_initial_chunks:
                                         chunks_buffer.append(content_piece)
                                         if "<tool_call>" in full_buffer:
                                             print("Tool call detected")
                                             is_tool_call = True
                                     # Process the current chunk
                                     if is_tool_call:
                                         else:
                                             continue
                                     else:
                                         # Regular content
                                         if is_first_chunk:
+                                            delta = {"content": "".join(chunks_buffer), "tool_calls": None}
                                             delta["role"] = "assistant"
                                             is_first_chunk = False
+                                            chunk_data = {
+                                            "id": chat_id, "object": "chat.completion.chunk", "created": created,
+                                            "model": model_id,
+                                            "choices": [{"index": 0, "delta": delta, "finish_reason": None}],
+                                            "usage": None
+                                            }
+                                            yield f"data: {json.dumps(chunk_data)}\n\n"
+                                        delta = {"content": content_piece, "tool_calls": None}
                                         chunk_data = {
                                             "id": chat_id, "object": "chat.completion.chunk", "created": created,
                                             "model": model_id,
                                     usage_info = json.loads(line[2:]).get("usage")
                                 except (json.JSONDecodeError, AttributeError): pass
                                 break
                         final_usage = None
                         if usage_info:
                             prompt_tokens = usage_info.get("promptTokens", 0)