yangtb24 commited on
Commit
939f93f
·
verified ·
1 Parent(s): 42bc360

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -113
app.py CHANGED
@@ -360,7 +360,7 @@ def handsome_chat_completions():
360
  headers=headers,
361
  json=data,
362
  stream=data.get("stream", False),
363
- timeout=120
364
  )
365
 
366
  if response.status_code == 429:
@@ -465,133 +465,70 @@ def handsome_chat_completions():
465
  content_type=response.headers['Content-Type']
466
  )
467
 
468
- if model_name == "deepseek-reasoner-openwebui":
469
  first_chunk_time = None
470
  full_response_content = ""
471
  reasoning_content_accumulated = ""
472
  content_accumulated = ""
473
  first_reasoning_chunk = True
474
-
 
475
  for chunk in response.iter_lines():
476
  if chunk:
477
  if first_chunk_time is None:
478
  first_chunk_time = time.time()
479
- full_response_content += chunk.decode("utf-8")
 
480
 
481
- for line in chunk.decode("utf-8").splitlines():
482
- if line.startswith("data:"):
483
- try:
484
- chunk_json = json.loads(line.lstrip("data: ").strip())
485
- if "choices" in chunk_json and len(chunk_json["choices"]) > 0:
486
- delta = chunk_json["choices"][0].get("delta", {})
487
-
488
- if delta.get("reasoning_content") is not None:
489
- reasoning_chunk = delta["reasoning_content"]
490
- if first_reasoning_chunk:
491
- think_chunk = f"<think>\n"
492
- yield f"data: {json.dumps({'choices': [{'delta': {'content': think_chunk}, 'index': 0}]})}\n\n"
493
- first_reasoning_chunk = False
494
- yield f"data: {json.dumps({'choices': [{'delta': {'content': reasoning_chunk}, 'index': 0}]})}\n\n"
495
-
496
- if delta.get("content") is not None:
497
- if not first_reasoning_chunk:
498
- reasoning_chunk = f"\n</think>\n"
499
- yield f"data: {json.dumps({'choices': [{'delta': {'content': reasoning_chunk}, 'index': 0}]})}\n\n"
500
- first_reasoning_chunk = True
501
- yield f"data: {json.dumps({'choices': [{'delta': {'content': delta["content"]}, 'index': 0}]})}\n\n"
502
-
503
- except (KeyError, ValueError, json.JSONDecodeError) as e:
504
- continue
505
-
506
- end_time = time.time()
507
- first_token_time = (
508
- first_chunk_time - start_time
509
- if first_chunk_time else 0
510
- )
511
- total_time = end_time - start_time
512
-
513
- prompt_tokens = 0
514
- completion_tokens = 0
515
- for line in full_response_content.splitlines():
516
- if line.startswith("data:"):
517
- line = line[5:].strip()
518
- if line == "[DONE]":
519
- continue
520
- try:
521
- response_json = json.loads(line)
522
-
523
- if (
524
- "usage" in response_json and
525
- "completion_tokens" in response_json["usage"]
526
- ):
527
- completion_tokens += response_json[
528
- "usage"
529
- ]["completion_tokens"]
530
- if (
531
- "usage" in response_json and
532
- "prompt_tokens" in response_json["usage"]
533
- ):
534
- prompt_tokens = response_json[
535
- "usage"
536
- ]["prompt_tokens"]
537
-
538
- except (
539
- KeyError,
540
- ValueError,
541
- IndexError
542
- ) as e:
543
- logging.error(
544
- f"解析流式响应单行 JSON 失败: {e}, "
545
- f"行内容: {line}"
546
- )
547
-
548
- user_content = ""
549
- messages = data.get("messages", [])
550
- for message in messages:
551
- if message["role"] == "user":
552
- if isinstance(message["content"], str):
553
- user_content += message["content"] + " "
554
- elif isinstance(message["content"], list):
555
- for item in message["content"]:
556
- if (
557
- isinstance(item, dict) and
558
- item.get("type") == "text"
559
- ):
560
- user_content += (
561
- item.get("text", "") +
562
- " "
563
- )
564
-
565
- user_content = user_content.strip()
566
-
567
- user_content_replaced = user_content.replace(
568
- '\n', '\\n'
569
- ).replace('\r', '\\n')
570
- response_content_replaced = (f"```Thinking\n{reasoning_content_accumulated}\n```\n" if reasoning_content_accumulated else "") + content_accumulated
571
- response_content_replaced = response_content_replaced.replace(
572
- '\n', '\\n'
573
- ).replace('\r', '\\n')
574
-
575
- logging.info(
576
- f"使用的key: {api_key}, "
577
- f"提示token: {prompt_tokens}, "
578
- f"输出token: {completion_tokens}, "
579
- f"首字用时: {first_token_time:.4f}秒, "
580
- f"总共用时: {total_time:.4f}秒, "
581
- f"使用的模型: {model_name}, "
582
- f"用户的内容: {user_content_replaced}, "
583
- f"输出的内容: {response_content_replaced}"
584
- )
585
 
586
- with data_lock:
587
- request_timestamps.append(time.time())
588
- token_counts.append(prompt_tokens + completion_tokens)
589
 
590
  yield "data: [DONE]\n\n"
591
-
592
  return Response(
593
  stream_with_context(generate()),
594
- content_type="text/event-stream"
 
 
 
 
595
  )
596
 
597
  first_chunk_time = None
 
360
  headers=headers,
361
  json=data,
362
  stream=data.get("stream", False),
363
+ timeout=1200
364
  )
365
 
366
  if response.status_code == 429:
 
465
  content_type=response.headers['Content-Type']
466
  )
467
 
468
+ if model_name == "deepseek-reasoner-openwebui":
469
  first_chunk_time = None
470
  full_response_content = ""
471
  reasoning_content_accumulated = ""
472
  content_accumulated = ""
473
  first_reasoning_chunk = True
474
+ response_id = f"chatcmpl-{uuid.uuid4()}"
475
+ created_time = int(time.time())
476
  for chunk in response.iter_lines():
477
  if chunk:
478
  if first_chunk_time is None:
479
  first_chunk_time = time.time()
480
+ chunk_str = chunk.decode("utf-8")
481
+ full_response_content += chunk_str
482
 
483
+ if chunk_str.startswith("data:"):
484
+ try:
485
+ chunk_json = json.loads(chunk_str[5:].strip())
486
+ delta = chunk_json.get("choices", [{}])[0].get("delta", {})
487
+
488
+ openai_chunk = {
489
+ "id": response_id,
490
+ "object": "chat.completion.chunk",
491
+ "created": created_time,
492
+ "model": model_name,
493
+ "choices": [{
494
+ "index": 0,
495
+ "delta": {},
496
+ "finish_reason": None
497
+ }]
498
+ }
499
+
500
+ if "reasoning_content" in delta:
501
+ if first_reasoning_chunk:
502
+ openai_chunk["choices"][0]["delta"]["content"] = "<think>\n"
503
+ yield f"data: {json.dumps(openai_chunk)}\n\n"
504
+ first_reasoning_chunk = False
505
+
506
+ openai_chunk["choices"][0]["delta"]["content"] = delta["reasoning_content"]
507
+ yield f"data: {json.dumps(openai_chunk)}\n\n"
508
+ reasoning_content_accumulated += delta["reasoning_content"]
509
+
510
+ if "content" in delta:
511
+ if not first_reasoning_chunk:
512
+ openai_chunk["choices"][0]["delta"]["content"] = "\n</think>\n"
513
+ yield f"data: {json.dumps(openai_chunk)}\n\n"
514
+ first_reasoning_chunk = True
515
+
516
+ openai_chunk["choices"][0]["delta"]["content"] = delta["content"]
517
+ yield f"data: {json.dumps(openai_chunk)}\n\n"
518
+ content_accumulated += delta["content"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
519
 
520
+ except (json.JSONDecodeError, KeyError) as e:
521
+ logging.error(f"Error parsing chunk: {e}")
 
522
 
523
  yield "data: [DONE]\n\n"
524
+
525
  return Response(
526
  stream_with_context(generate()),
527
+ mimetype="text/event-stream",
528
+ headers={
529
+ "X-Content-Type-Options": "nosniff",
530
+ "Connection": "keep-alive"
531
+ }
532
  )
533
 
534
  first_chunk_time = None