ds

Running

App Files Files Community

yangtb24 commited on Feb 4

Commit

939f93f

verified ·

1 Parent(s): 42bc360

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -113

app.py CHANGED Viewed

@@ -360,7 +360,7 @@ def handsome_chat_completions():
             headers=headers,
             json=data,
             stream=data.get("stream", False),
-            timeout=120
         )
         if response.status_code == 429:
@@ -465,133 +465,70 @@ def handsome_chat_completions():
                         content_type=response.headers['Content-Type']
                     )
-                if model_name == "deepseek-reasoner-openwebui":
                     first_chunk_time = None
                     full_response_content = ""
                     reasoning_content_accumulated = ""
                     content_accumulated = ""
                     first_reasoning_chunk = True
                     for chunk in response.iter_lines():
                         if chunk:
                             if first_chunk_time is None:
                                 first_chunk_time = time.time()
-                            full_response_content += chunk.decode("utf-8")
-                            for line in chunk.decode("utf-8").splitlines():
-                                if line.startswith("data:"):
-                                    try:
-                                        chunk_json = json.loads(line.lstrip("data: ").strip())
-                                        if "choices" in chunk_json and len(chunk_json["choices"]) > 0:
-                                            delta = chunk_json["choices"][0].get("delta", {})
-                                            if delta.get("reasoning_content") is not None:
-                                                reasoning_chunk = delta["reasoning_content"]
-                                                if first_reasoning_chunk:
-                                                    think_chunk = f"<think>\n"
-                                                    yield f"data: {json.dumps({'choices': [{'delta': {'content': think_chunk}, 'index': 0}]})}\n\n"
-                                                    first_reasoning_chunk = False
-                                                yield f"data: {json.dumps({'choices': [{'delta': {'content': reasoning_chunk}, 'index': 0}]})}\n\n"
-                                            if delta.get("content") is not None:
-                                                if not first_reasoning_chunk:
-                                                    reasoning_chunk = f"\n</think>\n"
-                                                    yield f"data: {json.dumps({'choices': [{'delta': {'content': reasoning_chunk}, 'index': 0}]})}\n\n"
-                                                    first_reasoning_chunk = True
-                                                yield f"data: {json.dumps({'choices': [{'delta': {'content': delta["content"]}, 'index': 0}]})}\n\n"
-                                    except (KeyError, ValueError, json.JSONDecodeError) as e:
-                                        continue
-                    end_time = time.time()
-                    first_token_time = (
-                        first_chunk_time - start_time
-                        if first_chunk_time else 0
-                    )
-                    total_time = end_time - start_time
-                    prompt_tokens = 0
-                    completion_tokens = 0
-                    for line in full_response_content.splitlines():
-                        if line.startswith("data:"):
-                            line = line[5:].strip()
-                            if line == "[DONE]":
-                                continue
-                            try:
-                                response_json = json.loads(line)
-                                if (
-                                    "usage" in response_json and
-                                    "completion_tokens" in response_json["usage"]
-                                ):
-                                    completion_tokens += response_json[
-                                        "usage"
-                                    ]["completion_tokens"]
-                                if (
-                                    "usage" in response_json and
-                                    "prompt_tokens" in response_json["usage"]
-                                ):
-                                    prompt_tokens = response_json[
-                                        "usage"
-                                    ]["prompt_tokens"]
-                            except (
-                                KeyError,
-                                ValueError,
-                                IndexError
-                            ) as e:
-                                logging.error(
-                                    f"解析流式响应单行 JSON 失败: {e}, "
-                                    f"行内容: {line}"
-                                )
-                    user_content = ""
-                    messages = data.get("messages", [])
-                    for message in messages:
-                        if message["role"] == "user":
-                            if isinstance(message["content"], str):
-                                user_content += message["content"] + " "
-                            elif isinstance(message["content"], list):
-                                for item in message["content"]:
-                                    if (
-                                        isinstance(item, dict) and
-                                        item.get("type") == "text"
-                                    ):
-                                        user_content += (
-                                            item.get("text", "") +
-                                            " "
-                                        )
-                    user_content = user_content.strip()
-                    user_content_replaced = user_content.replace(
-                        '\n', '\\n'
-                    ).replace('\r', '\\n')
-                    response_content_replaced = (f"```Thinking\n{reasoning_content_accumulated}\n```\n" if reasoning_content_accumulated else "") + content_accumulated
-                    response_content_replaced = response_content_replaced.replace(
-                        '\n', '\\n'
-                    ).replace('\r', '\\n')
-                    logging.info(
-                        f"使用的key: {api_key}, "
-                        f"提示token: {prompt_tokens}, "
-                        f"输出token: {completion_tokens}, "
-                        f"首字用时: {first_token_time:.4f}秒, "
-                        f"总共用时: {total_time:.4f}秒, "
-                        f"使用的模型: {model_name}, "
-                        f"用户的内容: {user_content_replaced}, "
-                        f"输出的内容: {response_content_replaced}"
-                    )
-                    with data_lock:
-                        request_timestamps.append(time.time())
-                        token_counts.append(prompt_tokens + completion_tokens)
                     yield "data: [DONE]\n\n"
                     return Response(
                         stream_with_context(generate()),
-                        content_type="text/event-stream"
                     )
                 first_chunk_time = None

             headers=headers,
             json=data,
             stream=data.get("stream", False),
+            timeout=1200
         )
         if response.status_code == 429:
                         content_type=response.headers['Content-Type']
                     )
+                if model_name == "deepseek-reasoner-openwebui":
                     first_chunk_time = None
                     full_response_content = ""
                     reasoning_content_accumulated = ""
                     content_accumulated = ""
                     first_reasoning_chunk = True
+                    response_id = f"chatcmpl-{uuid.uuid4()}"
+                    created_time = int(time.time())
                     for chunk in response.iter_lines():
                         if chunk:
                             if first_chunk_time is None:
                                 first_chunk_time = time.time()
+                            chunk_str = chunk.decode("utf-8")
+                            full_response_content += chunk_str
+                            if chunk_str.startswith("data:"):
+                                try:
+                                    chunk_json = json.loads(chunk_str[5:].strip())
+                                    delta = chunk_json.get("choices", [{}])[0].get("delta", {})
+                                    openai_chunk = {
+                                        "id": response_id,
+                                        "object": "chat.completion.chunk",
+                                        "created": created_time,
+                                        "model": model_name,
+                                        "choices": [{
+                                            "index": 0,
+                                            "delta": {},
+                                            "finish_reason": None
+                                        }]
+                                    }
+                                    if "reasoning_content" in delta:
+                                        if first_reasoning_chunk:
+                                            openai_chunk["choices"][0]["delta"]["content"] = "<think>\n"
+                                            yield f"data: {json.dumps(openai_chunk)}\n\n"
+                                            first_reasoning_chunk = False
+                                        openai_chunk["choices"][0]["delta"]["content"] = delta["reasoning_content"]
+                                        yield f"data: {json.dumps(openai_chunk)}\n\n"
+                                        reasoning_content_accumulated += delta["reasoning_content"]
+                                    if "content" in delta:
+                                        if not first_reasoning_chunk:
+                                            openai_chunk["choices"][0]["delta"]["content"] = "\n</think>\n"
+                                            yield f"data: {json.dumps(openai_chunk)}\n\n"
+                                            first_reasoning_chunk = True
+                                        openai_chunk["choices"][0]["delta"]["content"] = delta["content"]
+                                        yield f"data: {json.dumps(openai_chunk)}\n\n"
+                                        content_accumulated += delta["content"]
+                                except (json.JSONDecodeError, KeyError) as e:
+                                    logging.error(f"Error parsing chunk: {e}")
                     yield "data: [DONE]\n\n"
                     return Response(
                         stream_with_context(generate()),
+                        mimetype="text/event-stream",
+                        headers={
+                            "X-Content-Type-Options": "nosniff",
+                            "Connection": "keep-alive"
+                        }
                     )
                 first_chunk_time = None