Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -463,6 +463,7 @@ def handsome_chat_completions(): | |
| 463 | 
             
                        def generate():
         | 
| 464 | 
             
                            first_chunk_time = None
         | 
| 465 | 
             
                            full_response_content = ""
         | 
|  | |
| 466 | 
             
                            for chunk in response.iter_content(chunk_size=1024):
         | 
| 467 | 
             
                                if chunk:
         | 
| 468 | 
             
                                    if first_chunk_time is None:
         | 
| @@ -496,14 +497,20 @@ def handsome_chat_completions(): | |
| 496 | 
             
                                                "usage"
         | 
| 497 | 
             
                                            ]["completion_tokens"]
         | 
| 498 |  | 
| 499 | 
            -
                                        #  | 
| 500 | 
             
                                        if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
         | 
| 501 | 
             
                                            delta = response_json["choices"][0].get("delta", {})
         | 
| 502 | 
             
                                            if "reasoning_content" in delta and delta["reasoning_content"]:
         | 
| 503 | 
            -
                                                 | 
| 504 | 
            -
                                                formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_lines)
         | 
| 505 | 
            -
                                                response_content += formatted_reasoning + "\n"
         | 
| 506 | 
             
                                            if "content" in delta and delta["content"]:
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 507 | 
             
                                                response_content += delta["content"]
         | 
| 508 | 
             
                                        elif "choices" in response_json and len(response_json["choices"]) > 0:
         | 
| 509 | 
             
                                            delta = response_json["choices"][0].get("delta", {})
         | 
| @@ -527,6 +534,13 @@ def handsome_chat_completions(): | |
| 527 | 
             
                                            f"解析流式响应单行 JSON 失败: {e}, "
         | 
| 528 | 
             
                                            f"行内容: {line}"
         | 
| 529 | 
             
                                        )
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 530 |  | 
| 531 | 
             
                            user_content = ""
         | 
| 532 | 
             
                            messages = data.get("messages", [])
         | 
| @@ -574,6 +588,7 @@ def handsome_chat_completions(): | |
| 574 | 
             
                            content_type=response.headers['Content-Type']
         | 
| 575 | 
             
                        )
         | 
| 576 | 
             
                    else:
         | 
|  | |
| 577 | 
             
                        response.raise_for_status()
         | 
| 578 | 
             
                        end_time = time.time()
         | 
| 579 | 
             
                        response_json = response.json()
         | 
|  | |
| 463 | 
             
                        def generate():
         | 
| 464 | 
             
                            first_chunk_time = None
         | 
| 465 | 
             
                            full_response_content = ""
         | 
| 466 | 
            +
                            pending_reasoning_lines = []  # Store incomplete reasoning lines
         | 
| 467 | 
             
                            for chunk in response.iter_content(chunk_size=1024):
         | 
| 468 | 
             
                                if chunk:
         | 
| 469 | 
             
                                    if first_chunk_time is None:
         | 
|  | |
| 497 | 
             
                                                "usage"
         | 
| 498 | 
             
                                            ]["completion_tokens"]
         | 
| 499 |  | 
| 500 | 
            +
                                        # Improved special handling for deepseek-reasoner in streaming mode
         | 
| 501 | 
             
                                        if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
         | 
| 502 | 
             
                                            delta = response_json["choices"][0].get("delta", {})
         | 
| 503 | 
             
                                            if "reasoning_content" in delta and delta["reasoning_content"]:
         | 
| 504 | 
            +
                                                pending_reasoning_lines.extend(delta["reasoning_content"].splitlines(keepends=True))
         | 
|  | |
|  | |
| 505 | 
             
                                            if "content" in delta and delta["content"]:
         | 
| 506 | 
            +
                                                # Process any pending reasoning lines before the content
         | 
| 507 | 
            +
                                                if pending_reasoning_lines:
         | 
| 508 | 
            +
                                                    for reasoning_line in pending_reasoning_lines:
         | 
| 509 | 
            +
                                                        if reasoning_line.endswith("\n"):
         | 
| 510 | 
            +
                                                            response_content += f"> {reasoning_line}"
         | 
| 511 | 
            +
                                                        else:
         | 
| 512 | 
            +
                                                            response_content += f"> {reasoning_line}\n"
         | 
| 513 | 
            +
                                                    pending_reasoning_lines = []  # Clear pending lines
         | 
| 514 | 
             
                                                response_content += delta["content"]
         | 
| 515 | 
             
                                        elif "choices" in response_json and len(response_json["choices"]) > 0:
         | 
| 516 | 
             
                                            delta = response_json["choices"][0].get("delta", {})
         | 
|  | |
| 534 | 
             
                                            f"解析流式响应单行 JSON 失败: {e}, "
         | 
| 535 | 
             
                                            f"行内容: {line}"
         | 
| 536 | 
             
                                        )
         | 
| 537 | 
            +
                            
         | 
| 538 | 
            +
                            # Process any remaining reasoning lines after all chunks are received
         | 
| 539 | 
            +
                            if pending_reasoning_lines:
         | 
| 540 | 
            +
                                for reasoning_line in pending_reasoning_lines:
         | 
| 541 | 
            +
                                    response_content += f"> {reasoning_line}"
         | 
| 542 | 
            +
                                if not response_content.endswith("\n"):
         | 
| 543 | 
            +
                                    response_content += "\n"
         | 
| 544 |  | 
| 545 | 
             
                            user_content = ""
         | 
| 546 | 
             
                            messages = data.get("messages", [])
         | 
|  | |
| 588 | 
             
                            content_type=response.headers['Content-Type']
         | 
| 589 | 
             
                        )
         | 
| 590 | 
             
                    else:
         | 
| 591 | 
            +
                        # ... (rest of the code for non-streaming mode remains the same)
         | 
| 592 | 
             
                        response.raise_for_status()
         | 
| 593 | 
             
                        end_time = time.time()
         | 
| 594 | 
             
                        response_json = response.json()
         | 
