ds

Running

App Files Files Community

yangtb24 commited on Feb 4

Commit

03b936c

verified ·

1 Parent(s): 54ab1ea

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -27

app.py CHANGED Viewed

@@ -468,12 +468,38 @@ def handsome_chat_completions():
                 if model_name == "deepseek-reasoner-openwebui":
                     first_chunk_time = None
                     full_response_content = ""
-                    for chunk in response.iter_content(chunk_size=2048):
                         if chunk:
                             if first_chunk_time is None:
                                 first_chunk_time = time.time()
                             full_response_content += chunk.decode("utf-8")
-                            yield chunk
                     end_time = time.time()
                     first_token_time = (
@@ -484,7 +510,6 @@ def handsome_chat_completions():
                     prompt_tokens = 0
                     completion_tokens = 0
-                    response_content = ""
                     for line in full_response_content.splitlines():
                         if line.startswith("data:"):
                             line = line[5:].strip()
@@ -497,22 +522,9 @@ def handsome_chat_completions():
                                     "usage" in response_json and
                                     "completion_tokens" in response_json["usage"]
                                 ):
-                                    completion_tokens = response_json[
                                         "usage"
                                     ]["completion_tokens"]
-                                if (
-                                    "choices" in response_json and
-                                    len(response_json["choices"]) > 0 and
-                                    "delta" in response_json["choices"][0] and
-                                    "content" in response_json[
-                                        "choices"
-                                    ][0]["delta"]
-                                ):
-                                    response_content += response_json[
-                                        "choices"
-                                    ][0]["delta"]["content"]
                                 if (
                                     "usage" in response_json and
                                     "prompt_tokens" in response_json["usage"]
@@ -531,12 +543,30 @@ def handsome_chat_completions():
                                     f"行内容: {line}"
                                 )
-                    user_content = extract_user_content(data.get("messages", []))
                     user_content_replaced = user_content.replace(
                         '\n', '\\n'
                     ).replace('\r', '\\n')
-                    response_content_replaced = response_content.replace(
                         '\n', '\\n'
                     ).replace('\r', '\\n')
@@ -553,14 +583,14 @@ def handsome_chat_completions():
                     with data_lock:
                         request_timestamps.append(time.time())
-                        token_counts.append(prompt_tokens+completion_tokens)
-                        request_timestamps_day.append(time.time())
-                        token_counts_day.append(prompt_tokens+completion_tokens)
-                    return Response(
-                        stream_with_context(generate()),
-                        content_type=response.headers['Content-Type']
-                    )
                 first_chunk_time = None
                 full_response_content = ""
@@ -596,7 +626,7 @@ def handsome_chat_completions():
                                             yield f"data: {json.dumps({'choices': [{'delta': {'content': delta["content"]}, 'index': 0}]})}\n\n"
                                 except (KeyError, ValueError, json.JSONDecodeError) as e:
-                                    logging.error(f"解析流式响应单行 JSON 失败: {e}, 行内容: {line}")
                                     continue
                 end_time = time.time()

                 if model_name == "deepseek-reasoner-openwebui":
                     first_chunk_time = None
                     full_response_content = ""
+                    reasoning_content_accumulated = ""
+                    content_accumulated = ""
+                    first_reasoning_chunk = True
+                    for chunk in response.iter_lines():
                         if chunk:
                             if first_chunk_time is None:
                                 first_chunk_time = time.time()
                             full_response_content += chunk.decode("utf-8")
+                            for line in chunk.decode("utf-8").splitlines():
+                                if line.startswith("data:"):
+                                    try:
+                                        chunk_json = json.loads(line.lstrip("data: ").strip())
+                                        if "choices" in chunk_json and len(chunk_json["choices"]) > 0:
+                                            delta = chunk_json["choices"][0].get("delta", {})
+                                            if delta.get("reasoning_content") is not None:
+                                                reasoning_chunk = delta["reasoning_content"]
+                                                if first_reasoning_chunk:
+                                                    reasoning_chunk = f"<think>\n{reasoning_chunk}"
+                                                    first_reasoning_chunk = False
+                                                yield f"data: {json.dumps({'choices': [{'delta': {'content': reasoning_chunk}, 'index': 0}]})}\n\n"
+                                            if delta.get("content") is not None:
+                                                if not first_reasoning_chunk:
+                                                    yield f"data: {json.dumps({'choices': [{'delta': {'content': f'\n<\think>\n'}, 'index': 0}]})}\n\n"
+                                                    first_reasoning_chunk = True
+                                                yield f"data: {json.dumps({'choices': [{'delta': {'content': delta["content"]}, 'index': 0}]})}\n\n"
+                                    except (KeyError, ValueError, json.JSONDecodeError) as e:
+                                        continue
                     end_time = time.time()
                     first_token_time = (
                     prompt_tokens = 0
                     completion_tokens = 0
                     for line in full_response_content.splitlines():
                         if line.startswith("data:"):
                             line = line[5:].strip()
                                     "usage" in response_json and
                                     "completion_tokens" in response_json["usage"]
                                 ):
+                                    completion_tokens += response_json[
                                         "usage"
                                     ]["completion_tokens"]
                                 if (
                                     "usage" in response_json and
                                     "prompt_tokens" in response_json["usage"]
                                     f"行内容: {line}"
                                 )
+                    user_content = ""
+                    messages = data.get("messages", [])
+                    for message in messages:
+                        if message["role"] == "user":
+                            if isinstance(message["content"], str):
+                                user_content += message["content"] + " "
+                            elif isinstance(message["content"], list):
+                                for item in message["content"]:
+                                    if (
+                                        isinstance(item, dict) and
+                                        item.get("type") == "text"
+                                    ):
+                                        user_content += (
+                                            item.get("text", "") +
+                                            " "
+                                        )
+                    user_content = user_content.strip()
                     user_content_replaced = user_content.replace(
                         '\n', '\\n'
                     ).replace('\r', '\\n')
+                    response_content_replaced = (f"```Thinking\n{reasoning_content_accumulated}\n```\n" if reasoning_content_accumulated else "") + content_accumulated
+                    response_content_replaced = response_content_replaced.replace(
                         '\n', '\\n'
                     ).replace('\r', '\\n')
                     with data_lock:
                         request_timestamps.append(time.time())
+                        token_counts.append(prompt_tokens + completion_tokens)
+                    yield "data: [DONE]\n\n"
+                return Response(
+                    stream_with_context(generate()),
+                    content_type="text/event-stream"
+                )
                 first_chunk_time = None
                 full_response_content = ""
                                             yield f"data: {json.dumps({'choices': [{'delta': {'content': delta["content"]}, 'index': 0}]})}\n\n"
                                 except (KeyError, ValueError, json.JSONDecodeError) as e:
+                                    # logging.error(f"解析流式响应单行 JSON 失败: {e}, 行内容: {line}")
                                     continue
                 end_time = time.time()