ds

Running

App Files Files Community

yangtb24 commited on Jan 20

Commit

c861b0f

verified ·

1 Parent(s): 868e37f

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -27

app.py CHANGED Viewed

@@ -433,13 +433,30 @@ def handsome_chat_completions():
                 full_response_content = ""
                 reasoning_content_accumulated = ""  # Accumulate reasoning content
                 content_accumulated = ""  # Accumulate regular content
                 for chunk in response.iter_content(chunk_size=1024):
                     if chunk:
                         if first_chunk_time is None:
                             first_chunk_time = time.time()
                         full_response_content += chunk.decode("utf-8")
-                        yield chunk
                 end_time = time.time()
                 first_token_time = (
@@ -465,20 +482,6 @@ def handsome_chat_completions():
                                 completion_tokens += response_json[
                                     "usage"
                                 ]["completion_tokens"]
-                            # Special handling for deepseek-reasoner in streaming mode
-                            if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
-                                delta = response_json["choices"][0].get("delta", {})
-                                if "reasoning_content" in delta:
-                                    reasoning_content_accumulated += delta["reasoning_content"]
-                                if "content" in delta:
-                                    content_accumulated += delta["content"]
-                            elif "choices" in response_json and len(response_json["choices"]) > 0:
-                                # Handle other models normally
-                                delta = response_json["choices"][0].get("delta", {})
-                                if "content" in delta:
-                                    content_accumulated += delta["content"]
                             if (
                                 "usage" in response_json and
                                 "prompt_tokens" in response_json["usage"]
@@ -497,13 +500,6 @@ def handsome_chat_completions():
                                 f"行内容: {line}"
                             )
-                # Format the accumulated reasoning content after processing all chunks
-                if model_name == "deepseek-reasoner":
-                    formatted_reasoning = f"```Thinking\n{reasoning_content_accumulated}\n```"
-                    response_content = formatted_reasoning + "\n" + content_accumulated
-                else:
-                    response_content = content_accumulated
                 user_content = ""
                 messages = data.get("messages", [])
                 for message in messages:
@@ -526,7 +522,8 @@ def handsome_chat_completions():
                 user_content_replaced = user_content.replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')
-                response_content_replaced = response_content.replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')
@@ -544,10 +541,8 @@ def handsome_chat_completions():
                 with data_lock:
                     request_timestamps.append(time.time())
                     token_counts.append(prompt_tokens + completion_tokens)
-                yield f"data: {json.dumps({'choices': [{'delta': {'content': response_content}, 'index': 0, 'finish_reason': None}]})}\n\n"
-                yield "data: [DONE]\n\n"
             return Response(
                 stream_with_context(generate()),

                 full_response_content = ""
                 reasoning_content_accumulated = ""  # Accumulate reasoning content
                 content_accumulated = ""  # Accumulate regular content
                 for chunk in response.iter_content(chunk_size=1024):
                     if chunk:
                         if first_chunk_time is None:
                             first_chunk_time = time.time()
                         full_response_content += chunk.decode("utf-8")
+                        try:
+                            chunk_json = json.loads(chunk.decode("utf-8").lstrip("data: ").strip())
+                            if "choices" in chunk_json and len(chunk_json["choices"]) > 0:
+                                delta = chunk_json["choices"][0].get("delta", {})
+                                if "reasoning_content" in delta:
+                                    reasoning_content_accumulated += delta["reasoning_content"]
+                                    formatted_reasoning = f"```Thinking\n{reasoning_content_accumulated}\n```"
+                                    yield f"data: {json.dumps({'choices': [{'delta': {'content': formatted_reasoning}, 'index': 0, 'finish_reason': None}]})}\n\n"
+                                    reasoning_content_accumulated = ""
+                                if "content" in delta:
+                                    content_accumulated += delta["content"]
+                                    yield f"data: {json.dumps({'choices': [{'delta': {'content': content_accumulated}, 'index': 0, 'finish_reason': None}]})}\n\n"
+                                    content_accumulated = ""
+                        except (KeyError, ValueError, json.JSONDecodeError) as e:
+                            logging.error(f"解析流式响应单行 JSON 失败: {e}, 行内容: {chunk.decode('utf-8')}")
+                            continue
                 end_time = time.time()
                 first_token_time = (
                                 completion_tokens += response_json[
                                     "usage"
                                 ]["completion_tokens"]
                             if (
                                 "usage" in response_json and
                                 "prompt_tokens" in response_json["usage"]
                                 f"行内容: {line}"
                             )
                 user_content = ""
                 messages = data.get("messages", [])
                 for message in messages:
                 user_content_replaced = user_content.replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')
+                response_content_replaced = (f"```Thinking\n{reasoning_content_accumulated}\n```\n" if reasoning_content_accumulated else "") + content_accumulated
+                response_content_replaced = response_content_replaced.replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')
                 with data_lock:
                     request_timestamps.append(time.time())
                     token_counts.append(prompt_tokens + completion_tokens)
+                yield "data: [DONE]\n\n"
             return Response(
                 stream_with_context(generate()),