Update app.py
Browse files
app.py
CHANGED
@@ -53,7 +53,7 @@ def get_credit_summary(api_key):
|
|
53 |
if not data.get("is_available", False):
|
54 |
logging.warning(f"API Key: {api_key} is not available.")
|
55 |
return None
|
56 |
-
|
57 |
balance_infos = data.get("balance_infos", [])
|
58 |
total_balance_cny = 0.0
|
59 |
usd_balance = 0.0
|
@@ -270,12 +270,12 @@ def check_tokens():
|
|
270 |
)
|
271 |
|
272 |
return jsonify(results)
|
273 |
-
|
274 |
@app.route('/handsome/v1/models', methods=['GET'])
|
275 |
def list_models():
|
276 |
if not check_authorization(request):
|
277 |
return jsonify({"error": "Unauthorized"}), 401
|
278 |
-
|
279 |
detailed_models = [
|
280 |
{
|
281 |
"id": "deepseek-chat",
|
@@ -379,6 +379,38 @@ def billing_usage():
|
|
379 |
"total_usage": 0
|
380 |
})
|
381 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
382 |
@app.route('/handsome/v1/chat/completions', methods=['POST'])
|
383 |
def handsome_chat_completions():
|
384 |
if not check_authorization(request):
|
@@ -389,7 +421,7 @@ def handsome_chat_completions():
|
|
389 |
return jsonify({"error": "Invalid request data"}), 400
|
390 |
|
391 |
model_name = data['model']
|
392 |
-
|
393 |
api_key = select_key(model_name)
|
394 |
|
395 |
if not api_key:
|
@@ -402,8 +434,7 @@ def handsome_chat_completions():
|
|
402 |
)
|
403 |
}
|
404 |
), 429
|
405 |
-
|
406 |
-
# Special handling for deepseek-reasoner
|
407 |
if model_name == "deepseek-reasoner":
|
408 |
for param in ["temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs"]:
|
409 |
if param in data:
|
@@ -413,7 +444,7 @@ def handsome_chat_completions():
|
|
413 |
"Authorization": f"Bearer {api_key}",
|
414 |
"Content-Type": "application/json"
|
415 |
}
|
416 |
-
|
417 |
try:
|
418 |
start_time = time.time()
|
419 |
response = requests.post(
|
@@ -431,42 +462,12 @@ def handsome_chat_completions():
|
|
431 |
def generate():
|
432 |
first_chunk_time = None
|
433 |
full_response_content = ""
|
434 |
-
reasoning_content_accumulated = "" # Accumulate reasoning content
|
435 |
-
content_accumulated = "" # Accumulate regular content
|
436 |
-
first_reasoning_chunk = True # Flag to track the first reasoning chunk
|
437 |
-
|
438 |
for chunk in response.iter_content(chunk_size=1024):
|
439 |
if chunk:
|
440 |
if first_chunk_time is None:
|
441 |
first_chunk_time = time.time()
|
442 |
full_response_content += chunk.decode("utf-8")
|
443 |
-
|
444 |
-
for line in chunk.decode("utf-8").splitlines():
|
445 |
-
if line.startswith("data:"):
|
446 |
-
try:
|
447 |
-
chunk_json = json.loads(line.lstrip("data: ").strip())
|
448 |
-
if "choices" in chunk_json and len(chunk_json["choices"]) > 0:
|
449 |
-
delta = chunk_json["choices"][0].get("delta", {})
|
450 |
-
|
451 |
-
if delta.get("reasoning_content") is not None:
|
452 |
-
if first_reasoning_chunk:
|
453 |
-
reasoning_content_accumulated += "```Thinking\n"
|
454 |
-
first_reasoning_chunk = False
|
455 |
-
reasoning_content_accumulated += delta.get("reasoning_content", "")
|
456 |
-
|
457 |
-
if delta.get("content") is not None:
|
458 |
-
if not first_reasoning_chunk:
|
459 |
-
reasoning_content_accumulated += "\n```"
|
460 |
-
first_reasoning_chunk = True
|
461 |
-
content_accumulated += delta.get("content", "")
|
462 |
-
yield f"data: {json.dumps({'choices': [{'delta': {'content': (reasoning_content_accumulated if reasoning_content_accumulated else '') + content_accumulated}, 'index': 0, 'finish_reason': None}]})}\n\n"
|
463 |
-
reasoning_content_accumulated = ""
|
464 |
-
content_accumulated = ""
|
465 |
-
|
466 |
-
except (KeyError, ValueError, json.JSONDecodeError) as e:
|
467 |
-
logging.error(f"解析流式响应单行 JSON 失败: {e}, 行内容: {line}")
|
468 |
-
continue
|
469 |
-
|
470 |
|
471 |
end_time = time.time()
|
472 |
first_token_time = (
|
@@ -477,6 +478,7 @@ def handsome_chat_completions():
|
|
477 |
|
478 |
prompt_tokens = 0
|
479 |
completion_tokens = 0
|
|
|
480 |
for line in full_response_content.splitlines():
|
481 |
if line.startswith("data:"):
|
482 |
line = line[5:].strip()
|
@@ -489,9 +491,22 @@ def handsome_chat_completions():
|
|
489 |
"usage" in response_json and
|
490 |
"completion_tokens" in response_json["usage"]
|
491 |
):
|
492 |
-
completion_tokens
|
493 |
"usage"
|
494 |
]["completion_tokens"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
495 |
if (
|
496 |
"usage" in response_json and
|
497 |
"prompt_tokens" in response_json["usage"]
|
@@ -532,8 +547,7 @@ def handsome_chat_completions():
|
|
532 |
user_content_replaced = user_content.replace(
|
533 |
'\n', '\\n'
|
534 |
).replace('\r', '\\n')
|
535 |
-
response_content_replaced = (
|
536 |
-
response_content_replaced = response_content_replaced.replace(
|
537 |
'\n', '\\n'
|
538 |
).replace('\r', '\\n')
|
539 |
|
@@ -550,16 +564,13 @@ def handsome_chat_completions():
|
|
550 |
|
551 |
with data_lock:
|
552 |
request_timestamps.append(time.time())
|
553 |
-
token_counts.append(prompt_tokens
|
554 |
-
|
555 |
-
yield "data: [DONE]\n\n"
|
556 |
|
557 |
return Response(
|
558 |
stream_with_context(generate()),
|
559 |
-
content_type=
|
560 |
)
|
561 |
else:
|
562 |
-
# ... (Non-streaming part remains the same as in the previous response)
|
563 |
response.raise_for_status()
|
564 |
end_time = time.time()
|
565 |
response_json = response.json()
|
@@ -567,21 +578,12 @@ def handsome_chat_completions():
|
|
567 |
|
568 |
try:
|
569 |
prompt_tokens = response_json["usage"]["prompt_tokens"]
|
570 |
-
completion_tokens = response_json[
|
571 |
-
|
572 |
-
|
573 |
-
|
574 |
-
|
575 |
-
|
576 |
-
if "message" in choice:
|
577 |
-
if "reasoning_content" in choice["message"]:
|
578 |
-
formatted_reasoning = f"```Thinking\n{choice['message']['reasoning_content']}\n```"
|
579 |
-
response_content += formatted_reasoning + "\n"
|
580 |
-
if "content" in choice["message"]:
|
581 |
-
response_content += choice["message"]["content"]
|
582 |
-
elif "choices" in response_json and len(response_json["choices"]) > 0:
|
583 |
-
response_content = response_json["choices"][0]["message"]["content"]
|
584 |
-
|
585 |
except (KeyError, ValueError, IndexError) as e:
|
586 |
logging.error(
|
587 |
f"解析非流式响应 JSON 失败: {e}, "
|
@@ -604,8 +606,7 @@ def handsome_chat_completions():
|
|
604 |
item.get("type") == "text"
|
605 |
):
|
606 |
user_content += (
|
607 |
-
item.get("text", "") +
|
608 |
-
" "
|
609 |
)
|
610 |
|
611 |
user_content = user_content.strip()
|
@@ -629,32 +630,12 @@ def handsome_chat_completions():
|
|
629 |
)
|
630 |
with data_lock:
|
631 |
request_timestamps.append(time.time())
|
632 |
-
|
633 |
-
|
634 |
-
|
635 |
-
|
636 |
-
"id": response_json.get("id", ""),
|
637 |
-
"object": "chat.completion",
|
638 |
-
"created": response_json.get("created", int(time.time())),
|
639 |
-
"model": model_name,
|
640 |
-
"choices": [
|
641 |
-
{
|
642 |
-
"index": 0,
|
643 |
-
"message": {
|
644 |
-
"role": "assistant",
|
645 |
-
"content": response_content
|
646 |
-
},
|
647 |
-
"finish_reason": "stop"
|
648 |
-
}
|
649 |
-
],
|
650 |
-
"usage": {
|
651 |
-
"prompt_tokens": prompt_tokens,
|
652 |
-
"completion_tokens": completion_tokens,
|
653 |
-
"total_tokens": prompt_tokens + completion_tokens
|
654 |
-
}
|
655 |
-
}
|
656 |
|
657 |
-
return jsonify(
|
658 |
|
659 |
except requests.exceptions.RequestException as e:
|
660 |
logging.error(f"请求转发异常: {e}")
|
@@ -680,4 +661,4 @@ if __name__ == '__main__':
|
|
680 |
debug=False,
|
681 |
host='0.0.0.0',
|
682 |
port=int(os.environ.get('PORT', 7860))
|
683 |
-
)
|
|
|
53 |
if not data.get("is_available", False):
|
54 |
logging.warning(f"API Key: {api_key} is not available.")
|
55 |
return None
|
56 |
+
|
57 |
balance_infos = data.get("balance_infos", [])
|
58 |
total_balance_cny = 0.0
|
59 |
usd_balance = 0.0
|
|
|
270 |
)
|
271 |
|
272 |
return jsonify(results)
|
273 |
+
|
274 |
@app.route('/handsome/v1/models', methods=['GET'])
|
275 |
def list_models():
|
276 |
if not check_authorization(request):
|
277 |
return jsonify({"error": "Unauthorized"}), 401
|
278 |
+
|
279 |
detailed_models = [
|
280 |
{
|
281 |
"id": "deepseek-chat",
|
|
|
379 |
"total_usage": 0
|
380 |
})
|
381 |
|
382 |
+
@app.route('/handsome/v1/dashboard/billing/subscription', methods=['GET'])
|
383 |
+
def billing_subscription():
|
384 |
+
if not check_authorization(request):
|
385 |
+
return jsonify({"error": "Unauthorized"}), 401
|
386 |
+
|
387 |
+
total_balance = get_billing_info()
|
388 |
+
|
389 |
+
return jsonify({
|
390 |
+
"object": "billing_subscription",
|
391 |
+
"has_payment_method": False,
|
392 |
+
"canceled": False,
|
393 |
+
"canceled_at": None,
|
394 |
+
"delinquent": None,
|
395 |
+
"access_until": int(datetime(9999, 12, 31).timestamp()),
|
396 |
+
"soft_limit": 0,
|
397 |
+
"hard_limit": total_balance,
|
398 |
+
"system_hard_limit": total_balance,
|
399 |
+
"soft_limit_usd": 0,
|
400 |
+
"hard_limit_usd": total_balance,
|
401 |
+
"system_hard_limit_usd": total_balance,
|
402 |
+
"plan": {
|
403 |
+
"name": "SiliconFlow API",
|
404 |
+
"id": "siliconflow-api"
|
405 |
+
},
|
406 |
+
"account_name": "SiliconFlow User",
|
407 |
+
"po_number": None,
|
408 |
+
"billing_email": None,
|
409 |
+
"tax_ids": [],
|
410 |
+
"billing_address": None,
|
411 |
+
"business_address": None
|
412 |
+
})
|
413 |
+
|
414 |
@app.route('/handsome/v1/chat/completions', methods=['POST'])
|
415 |
def handsome_chat_completions():
|
416 |
if not check_authorization(request):
|
|
|
421 |
return jsonify({"error": "Invalid request data"}), 400
|
422 |
|
423 |
model_name = data['model']
|
424 |
+
|
425 |
api_key = select_key(model_name)
|
426 |
|
427 |
if not api_key:
|
|
|
434 |
)
|
435 |
}
|
436 |
), 429
|
437 |
+
|
|
|
438 |
if model_name == "deepseek-reasoner":
|
439 |
for param in ["temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs"]:
|
440 |
if param in data:
|
|
|
444 |
"Authorization": f"Bearer {api_key}",
|
445 |
"Content-Type": "application/json"
|
446 |
}
|
447 |
+
|
448 |
try:
|
449 |
start_time = time.time()
|
450 |
response = requests.post(
|
|
|
462 |
def generate():
|
463 |
first_chunk_time = None
|
464 |
full_response_content = ""
|
|
|
|
|
|
|
|
|
465 |
for chunk in response.iter_content(chunk_size=1024):
|
466 |
if chunk:
|
467 |
if first_chunk_time is None:
|
468 |
first_chunk_time = time.time()
|
469 |
full_response_content += chunk.decode("utf-8")
|
470 |
+
yield chunk
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
471 |
|
472 |
end_time = time.time()
|
473 |
first_token_time = (
|
|
|
478 |
|
479 |
prompt_tokens = 0
|
480 |
completion_tokens = 0
|
481 |
+
response_content = ""
|
482 |
for line in full_response_content.splitlines():
|
483 |
if line.startswith("data:"):
|
484 |
line = line[5:].strip()
|
|
|
491 |
"usage" in response_json and
|
492 |
"completion_tokens" in response_json["usage"]
|
493 |
):
|
494 |
+
completion_tokens = response_json[
|
495 |
"usage"
|
496 |
]["completion_tokens"]
|
497 |
+
|
498 |
+
if (
|
499 |
+
"choices" in response_json and
|
500 |
+
len(response_json["choices"]) > 0 and
|
501 |
+
"delta" in response_json["choices"][0] and
|
502 |
+
"content" in response_json[
|
503 |
+
"choices"
|
504 |
+
][0]["delta"]
|
505 |
+
):
|
506 |
+
response_content += response_json[
|
507 |
+
"choices"
|
508 |
+
][0]["delta"]["content"]
|
509 |
+
|
510 |
if (
|
511 |
"usage" in response_json and
|
512 |
"prompt_tokens" in response_json["usage"]
|
|
|
547 |
user_content_replaced = user_content.replace(
|
548 |
'\n', '\\n'
|
549 |
).replace('\r', '\\n')
|
550 |
+
response_content_replaced = response_content.replace(
|
|
|
551 |
'\n', '\\n'
|
552 |
).replace('\r', '\\n')
|
553 |
|
|
|
564 |
|
565 |
with data_lock:
|
566 |
request_timestamps.append(time.time())
|
567 |
+
token_counts.append(prompt_tokens+completion_tokens)
|
|
|
|
|
568 |
|
569 |
return Response(
|
570 |
stream_with_context(generate()),
|
571 |
+
content_type=response.headers['Content-Type']
|
572 |
)
|
573 |
else:
|
|
|
574 |
response.raise_for_status()
|
575 |
end_time = time.time()
|
576 |
response_json = response.json()
|
|
|
578 |
|
579 |
try:
|
580 |
prompt_tokens = response_json["usage"]["prompt_tokens"]
|
581 |
+
completion_tokens = response_json[
|
582 |
+
"usage"
|
583 |
+
]["completion_tokens"]
|
584 |
+
response_content = response_json[
|
585 |
+
"choices"
|
586 |
+
][0]["message"]["content"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
587 |
except (KeyError, ValueError, IndexError) as e:
|
588 |
logging.error(
|
589 |
f"解析非流式响应 JSON 失败: {e}, "
|
|
|
606 |
item.get("type") == "text"
|
607 |
):
|
608 |
user_content += (
|
609 |
+
item.get("text", "") + " "
|
|
|
610 |
)
|
611 |
|
612 |
user_content = user_content.strip()
|
|
|
630 |
)
|
631 |
with data_lock:
|
632 |
request_timestamps.append(time.time())
|
633 |
+
if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]:
|
634 |
+
token_counts.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"])
|
635 |
+
else:
|
636 |
+
token_counts.append(0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
637 |
|
638 |
+
return jsonify(response_json)
|
639 |
|
640 |
except requests.exceptions.RequestException as e:
|
641 |
logging.error(f"请求转发异常: {e}")
|
|
|
661 |
debug=False,
|
662 |
host='0.0.0.0',
|
663 |
port=int(os.environ.get('PORT', 7860))
|
664 |
+
)
|