ds

Running

App Files Files Community

yangtb24 commited on Jan 20

Commit

be80fad

verified ·

1 Parent(s): fb1ddc7

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -6

app.py CHANGED Viewed

@@ -433,6 +433,7 @@ def handsome_chat_completions():
                 full_response_content = ""
                 reasoning_content_accumulated = ""  # Accumulate reasoning content
                 content_accumulated = ""  # Accumulate regular content
                 for chunk in response.iter_content(chunk_size=1024):
                     if chunk:
@@ -446,11 +447,13 @@ def handsome_chat_completions():
                                 delta = chunk_json["choices"][0].get("delta", {})
                                 if delta.get("reasoning_content") is not None:
-                                    reasoning_content_accumulated += delta.get("reasoning_content", "")
-                                    formatted_reasoning = f"```Thinking\n{reasoning_content_accumulated}\n```"
-                                    yield f"data: {json.dumps({'choices': [{'delta': {'content': formatted_reasoning}, 'index': 0, 'finish_reason': None}]})}\n\n"
-                                    reasoning_content_accumulated = ""
                                 if delta.get("content") is not None:
                                     content_accumulated += delta.get("content", "")
                                     yield f"data: {json.dumps({'choices': [{'delta': {'content': content_accumulated}, 'index': 0, 'finish_reason': None}]})}\n\n"
@@ -460,6 +463,10 @@ def handsome_chat_completions():
                             logging.error(f"解析流式响应单行 JSON 失败: {e}, 行内容: {chunk.decode('utf-8')}")
                             continue
                 end_time = time.time()
                 first_token_time = (
                     first_chunk_time - start_time
@@ -524,7 +531,7 @@ def handsome_chat_completions():
                 user_content_replaced = user_content.replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')
-                response_content_replaced = (f"```Thinking\n{reasoning_content_accumulated}\n```\n" if reasoning_content_accumulated else "") + content_accumulated
                 response_content_replaced = response_content_replaced.replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')

                 full_response_content = ""
                 reasoning_content_accumulated = ""  # Accumulate reasoning content
                 content_accumulated = ""  # Accumulate regular content
+                is_first_reasoning = True
                 for chunk in response.iter_content(chunk_size=1024):
                     if chunk:
                                 delta = chunk_json["choices"][0].get("delta", {})
                                 if delta.get("reasoning_content") is not None:
+                                    if is_first_reasoning:
+                                        reasoning_content_accumulated += f"```Thinking\n{delta.get('reasoning_content', '')}"
+                                        is_first_reasoning = False
+                                    else:
+                                         reasoning_content_accumulated += delta.get("reasoning_content", "")
                                 if delta.get("content") is not None:
                                     content_accumulated += delta.get("content", "")
                                     yield f"data: {json.dumps({'choices': [{'delta': {'content': content_accumulated}, 'index': 0, 'finish_reason': None}]})}\n\n"
                             logging.error(f"解析流式响应单行 JSON 失败: {e}, 行内容: {chunk.decode('utf-8')}")
                             continue
+                if reasoning_content_accumulated:
+                    reasoning_content_accumulated += "\n```"
+                    yield f"data: {json.dumps({'choices': [{'delta': {'content': reasoning_content_accumulated}, 'index': 0, 'finish_reason': None}]})}\n\n"
                 end_time = time.time()
                 first_token_time = (
                     first_chunk_time - start_time
                 user_content_replaced = user_content.replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')
+                response_content_replaced = (f"{reasoning_content_accumulated}\n" if reasoning_content_accumulated else "") + content_accumulated
                 response_content_replaced = response_content_replaced.replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')