Update main.py
Browse files
main.py
CHANGED
@@ -123,7 +123,7 @@ Using tools is recommended.
|
|
123 |
if line.startswith("0:"):
|
124 |
try:
|
125 |
content_piece = json.loads(line[2:])
|
126 |
-
|
127 |
# Buffer the first few chunks
|
128 |
if len(chunks_buffer) < max_initial_chunks:
|
129 |
chunks_buffer.append(content_piece)
|
@@ -134,19 +134,6 @@ Using tools is recommended.
|
|
134 |
if "<tool_call>" in full_buffer:
|
135 |
print("Tool call detected")
|
136 |
is_tool_call = True
|
137 |
-
else:
|
138 |
-
# No tool call, send buffered chunks as regular content
|
139 |
-
delta = {"content": full_buffer, "tool_calls": None}
|
140 |
-
if is_first_chunk:
|
141 |
-
delta["role"] = "assistant"
|
142 |
-
is_first_chunk = False
|
143 |
-
chunk_data = {
|
144 |
-
"id": chat_id, "object": "chat.completion.chunk", "created": created,
|
145 |
-
"model": model_id,
|
146 |
-
"choices": [{"index": 0, "delta": delta, "finish_reason": None}],
|
147 |
-
"usage": None
|
148 |
-
}
|
149 |
-
yield f"data: {json.dumps(chunk_data)}\n\n"
|
150 |
|
151 |
# Process the current chunk
|
152 |
if is_tool_call:
|
@@ -181,11 +168,22 @@ Using tools is recommended.
|
|
181 |
else:
|
182 |
continue
|
183 |
else:
|
|
|
184 |
# Regular content
|
185 |
-
delta = {"content": content_piece, "tool_calls": None}
|
186 |
if is_first_chunk:
|
|
|
187 |
delta["role"] = "assistant"
|
188 |
is_first_chunk = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
chunk_data = {
|
190 |
"id": chat_id, "object": "chat.completion.chunk", "created": created,
|
191 |
"model": model_id,
|
@@ -199,20 +197,7 @@ Using tools is recommended.
|
|
199 |
usage_info = json.loads(line[2:]).get("usage")
|
200 |
except (json.JSONDecodeError, AttributeError): pass
|
201 |
break
|
202 |
-
|
203 |
-
if chunks_buffer and not is_tool_call:
|
204 |
-
full_buffer = ''.join(chunks_buffer)
|
205 |
-
delta = {"content": full_buffer, "tool_calls": None}
|
206 |
-
if is_first_chunk:
|
207 |
-
delta["role"] = "assistant"
|
208 |
-
is_first_chunk = False
|
209 |
-
chunk_data = {
|
210 |
-
"id": chat_id, "object": "chat.completion.chunk", "created": created,
|
211 |
-
"model": model_id,
|
212 |
-
"choices": [{"index": 0, "delta": delta, "finish_reason": None}],
|
213 |
-
"usage": None
|
214 |
-
}
|
215 |
-
yield f"data: {json.dumps(chunk_data)}\n\n"
|
216 |
final_usage = None
|
217 |
if usage_info:
|
218 |
prompt_tokens = usage_info.get("promptTokens", 0)
|
|
|
123 |
if line.startswith("0:"):
|
124 |
try:
|
125 |
content_piece = json.loads(line[2:])
|
126 |
+
print(content_piece)
|
127 |
# Buffer the first few chunks
|
128 |
if len(chunks_buffer) < max_initial_chunks:
|
129 |
chunks_buffer.append(content_piece)
|
|
|
134 |
if "<tool_call>" in full_buffer:
|
135 |
print("Tool call detected")
|
136 |
is_tool_call = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
|
138 |
# Process the current chunk
|
139 |
if is_tool_call:
|
|
|
168 |
else:
|
169 |
continue
|
170 |
else:
|
171 |
+
|
172 |
# Regular content
|
|
|
173 |
if is_first_chunk:
|
174 |
+
delta = {"content": "".join(chunks_buffer), "tool_calls": None}
|
175 |
delta["role"] = "assistant"
|
176 |
is_first_chunk = False
|
177 |
+
chunk_data = {
|
178 |
+
"id": chat_id, "object": "chat.completion.chunk", "created": created,
|
179 |
+
"model": model_id,
|
180 |
+
"choices": [{"index": 0, "delta": delta, "finish_reason": None}],
|
181 |
+
"usage": None
|
182 |
+
}
|
183 |
+
yield f"data: {json.dumps(chunk_data)}\n\n"
|
184 |
+
|
185 |
+
delta = {"content": content_piece, "tool_calls": None}
|
186 |
+
|
187 |
chunk_data = {
|
188 |
"id": chat_id, "object": "chat.completion.chunk", "created": created,
|
189 |
"model": model_id,
|
|
|
197 |
usage_info = json.loads(line[2:]).get("usage")
|
198 |
except (json.JSONDecodeError, AttributeError): pass
|
199 |
break
|
200 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
final_usage = None
|
202 |
if usage_info:
|
203 |
prompt_tokens = usage_info.get("promptTokens", 0)
|