Spaces:

baidu
/

conversation_demo

Running

App Files Files Community

maxiaolong03 commited on Jul 3

Commit

a93c636

1 Parent(s): 47fd9da

add files

Browse files

Files changed (2) hide show

app.py +122 -169
bot_requests.py +56 -71

app.py CHANGED Viewed

@@ -15,23 +15,22 @@
 """This file contains the code for the chatbot demo using Gradio."""
 import argparse
-from collections import namedtuple
-from functools import partial
 import json
 import logging
 import os
-import base64
 from argparse import ArgumentParser
 import gradio as gr
 from bot_requests import BotClient
 os.environ["NO_PROXY"] = "localhost,127.0.0.1"  # Disable proxy
 logging.root.setLevel(logging.INFO)
-MULTI_MODEL_PREFIX = "ernie-4.5-turbo-vl"
 def get_args() -> argparse.Namespace:
@@ -48,21 +47,13 @@ def get_args() -> argparse.Namespace:
     """
     parser = ArgumentParser(description="ERNIE models web chat demo.")
     parser.add_argument(
-        "--server-port", type=int, default=7860, help="Demo server port."
-    )
-    parser.add_argument(
-        "--server-name", type=str, default="0.0.0.0", help="Demo server name."
-    )
-    parser.add_argument(
-        "--max_char", type=int, default=8000, help="Maximum character limit for messages."
-    )
-    parser.add_argument(
-        "--max_retry_num", type=int, default=3, help="Maximum retry number for request."
-    )
-    parser.add_argument(
-        "--model_map",
-        type=str,
         default="""{
             "ernie-4.5-turbo-128k-preview": "https://qianfan.baidubce.com/v2",
             "ernie-4.5-21b-a3b": "https://qianfan.baidubce.com/v2",
@@ -80,7 +71,7 @@ def get_args() -> argparse.Namespace:
             - Prefix determines model capabilities:
             * ERNIE-4.5[-*]: Text-only model
             * ERNIE-4.5-VL[-*]: Multimodal models (image+text)
-            """
     )
     args = parser.parse_args()
@@ -96,7 +87,7 @@ def get_args() -> argparse.Namespace:
     return args
-class GradioEvents(object):
     """
     Central handler for all Gradio interface events in the chatbot demo. Provides static methods
     for processing user interactions including:
@@ -104,16 +95,17 @@ class GradioEvents(object):
     - Conversation state management
     - Image handling and URL conversion
     - Component visibility control
-    Coordinates with BotClient to interface with backend models while maintaining
     conversation history and handling multimodal inputs.
     """
     @staticmethod
     def get_image_url(image_path: str) -> str:
         """
-        Converts an image file at the given path to a base64 encoded data URL
-        that can be used directly in HTML or Gradio interfaces.
-        Reads the image file, encodes it in base64 format, and constructs
         a data URL with the appropriate image MIME type.
         Args:
@@ -126,26 +118,26 @@ class GradioEvents(object):
         extension = image_path.split(".")[-1]
         with open(image_path, "rb") as image_file:
             base64_image = base64.b64encode(image_file.read()).decode("utf-8")
-        url = "data:image/{ext};base64,{img}".format(ext=extension, img=base64_image)
         return url
     @staticmethod
     def chat_stream(
-        query: str,
-        task_history: list,
-        image_history: dict,
-        model_name: str,
-        file_url: str,
-        system_msg: str,
-        max_tokens: int,
-        temperature: float,
-        top_p: float,
-        bot_client: BotClient
     ) -> str:
         """
-        Handles streaming chat interactions by processing user queries and
-        generating real-time responses from the bot client. Constructs conversation
-        history including system messages, text inputs and image attachments, then
         streams back model responses.
         Args:
@@ -169,10 +161,9 @@ class GradioEvents(object):
         for idx, (query_h, response_h) in enumerate(task_history):
             if idx in image_history:
                 content = []
-                content.append({
-                    "type": "image_url",
-                    "image_url": {"url": GradioEvents.get_image_url(image_history[idx])}
-                })
                 content.append({"type": "text", "text": query_h})
                 conversation.append({"role": "user", "content": content})
             else:
@@ -193,29 +184,29 @@ class GradioEvents(object):
             for chunk in bot_client.process_stream(model_name, req_data, max_tokens, temperature, top_p):
                 if "error" in chunk:
                     raise Exception(chunk["error"])
                 message = chunk.get("choices", [{}])[0].get("delta", {})
                 content = message.get("content", "")
                 if content:
                     yield content
         except Exception as e:
             raise gr.Error("Exception: " + repr(e))
     @staticmethod
     def predict_stream(
-        query: str,
-        chatbot: list,
-        task_history: list,
-        image_history: dict,
-        model: str,
-        file_url: str,
-        system_msg: str,
-        max_tokens: int,
-        temperature: float,
-        top_p: float,
-        bot_client: BotClient
     ) -> list:
         """
         Processes user queries in a streaming manner by coordinating with the chat stream handler,
@@ -240,29 +231,20 @@ class GradioEvents(object):
             list: A list containing the updated chatbot state after processing the user's query.
         """
-        logging.info("User: {}".format(query))
-        chatbot.append({"role": "user", "content": query})
         # First yield the chatbot with user message
         yield chatbot
         new_texts = GradioEvents.chat_stream(
-            query,
-            task_history,
-            image_history,
-            model,
-            file_url,
-            system_msg,
-            max_tokens,
-            temperature,
-            top_p,
-            bot_client
         )
         response = ""
-        for new_text in new_texts:
             response += new_text
             # Remove previous message if exists
             if chatbot[-1].get("role") == "assistant":
                 chatbot.pop(-1)
@@ -271,26 +253,26 @@ class GradioEvents(object):
                 chatbot.append({"role": "assistant", "content": response})
                 yield chatbot
-        logging.info("History: {}".format(task_history))
-        task_history.append((query, response))
-        logging.info("ERNIE models: {}".format(response))
     @staticmethod
     def regenerate(
-        chatbot: list,
-        task_history: list,
-        image_history: dict,
-        model: str,
-        file_url: str,
-        system_msg: str,
-        max_tokens: int,
-        temperature: float,
-        top_p: float,
-        bot_client: BotClient
     ) -> list:
         """
-        Reconstructs the conversation context by removing the last interaction and
-        reprocesses the user's previous query to generate a fresh response. Maintains
         consistency in conversation flow while allowing response regeneration.
         Args:
@@ -319,26 +301,25 @@ class GradioEvents(object):
             chatbot.pop(-1)
         chatbot.pop(-1)
-        for chunk in GradioEvents.predict_stream(
-            item[0],
-            chatbot,
-            task_history,
             image_history,
-            model,
             file_url,
-            system_msg,
-            max_tokens,
-            temperature,
             top_p,
-            bot_client
-        ):
-            yield chunk
     @staticmethod
     def reset_user_input() -> gr.update:
         """
         Reset user input field value to empty string.
         Returns:
             gr.update: Update object representing the new value of the user input field.
         """
@@ -348,7 +329,7 @@ class GradioEvents(object):
     def reset_state() -> tuple:
         """
         Reset all states including chatbot, task_history, image_history, and file_btn.
         Returns:
             tuple: A tuple containing the following values:
                 - chatbot (list): An empty list that represents the cleared chatbot state.
@@ -357,19 +338,15 @@ class GradioEvents(object):
                 - file_btn (gr.update): An update object that sets the value of the file button to None.
         """
         GradioEvents.gc()
-        reset_result = namedtuple("reset_result",
-                           ["chatbot",
-                            "task_history",
-                            "image_history",
-                            "file_btn"])
         return reset_result(
             [],  # clear chatbot
             [],  # clear task_history
             {},  # clear image_history
             gr.update(value=None),  # clear file_btn
         )
     @staticmethod
     def gc():
         """Run garbage collection to free up memory resources."""
@@ -381,10 +358,10 @@ class GradioEvents(object):
     def toggle_components_visibility(model_name: str) -> gr.update:
         """
         Toggle visibility of components depending on the selected model name.
         Args:
             model_name (str): Name of the selected model.
         Returns:
             gr.update: An update object representing the visibility of the file button.
         """
@@ -394,7 +371,7 @@ class GradioEvents(object):
 def launch_demo(args: argparse.Namespace, bot_client: BotClient):
     """
     Launch demo program
     Args:
         args (argparse.Namespace): argparse Namespace object containing parsed command line arguments
         bot_client (BotClient): Bot client instance
@@ -420,34 +397,29 @@ def launch_demo(args: argparse.Namespace, bot_client: BotClient):
     """
     with gr.Blocks(css=css) as demo:
         logo_url = GradioEvents.get_image_url("assets/logo.png")
-        gr.Markdown("""\
-                <p align="center"><img src="{}" \
-                style="height: 60px"/><p>""".format(logo_url))
         gr.Markdown(
             """\
 <center><font size=3>This demo is based on ERNIE models. \
 (本演示基于文心大模型实现。)</center>"""
         )
-        chatbot = gr.Chatbot(
-            label="ERNIE",
-            elem_classes="control-height",
-            type="messages"
-        )
         model_names = list(args.model_map.keys())
         with gr.Row():
             model_name = gr.Dropdown(
-                label="Select Model",
-                choices=model_names,
-                value=model_names[0],
-                allow_custom_value=True
             )
             file_btn = gr.File(
-                label="Image upload (Active only for multimodal models. Accepted formats: PNG, JPEG, JPG)",
-                height="80px",
-                visible=True,
                 file_types=[".png", ".jpeg", "jpg"],
-                elem_id="file-upload"
             )
         query = gr.Textbox(label="Input", elem_id="text_input")
@@ -462,66 +434,46 @@ def launch_demo(args: argparse.Namespace, bot_client: BotClient):
                 system_message,
                 gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max new tokens"),
                 gr.Slider(minimum=0.1, maximum=1.0, value=1.0, step=0.05, label="Temperature"),
-                gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.05, label="Top-p (nucleus sampling)")
             ]
         task_history = gr.State([])
         image_history = gr.State({})
-        model_name.change(
-            GradioEvents.toggle_components_visibility,
-            inputs=model_name,
-            outputs=file_btn
-        )
         model_name.change(
-            GradioEvents.reset_state,
-            outputs=[chatbot, task_history, image_history, file_btn],
-            show_progress=True
-        )
-        predict_with_clients = partial(
-            GradioEvents.predict_stream,
-            bot_client=bot_client
-        )
-        regenerate_with_clients = partial(
-            GradioEvents.regenerate,
-            bot_client=bot_client
         )
         query.submit(
-            predict_with_clients,
-            inputs=[query, chatbot, task_history, image_history, model_name, file_btn] + additional_inputs,
-            outputs=[chatbot],
-            show_progress=True
         )
         query.submit(GradioEvents.reset_user_input, [], [query])
         submit_btn.click(
-            predict_with_clients,
-            inputs=[query, chatbot, task_history, image_history, model_name, file_btn] + additional_inputs,
-            outputs=[chatbot],
             show_progress=True,
         )
         submit_btn.click(GradioEvents.reset_user_input, [], [query])
         empty_btn.click(
-            GradioEvents.reset_state,
-            outputs=[chatbot, task_history, image_history, file_btn],
-            show_progress=True
         )
         regen_btn.click(
-            regenerate_with_clients,
-            inputs=[chatbot, task_history, image_history, model_name, file_btn] + additional_inputs,
-            outputs=[chatbot],
-            show_progress=True
         )
-        demo.load(
-            GradioEvents.toggle_components_visibility,
-            inputs=gr.State(model_names[0]),
-            outputs=file_btn
-        )
-    demo.queue().launch(
-        server_port=args.server_port,
-        server_name=args.server_name
-    )
 def main():
     """Main function that runs when this script is executed."""
@@ -529,5 +481,6 @@ def main():
     bot_client = BotClient(args)
     launch_demo(args, bot_client)
 if __name__ == "__main__":
     main()

 """This file contains the code for the chatbot demo using Gradio."""
 import argparse
+import base64
 import json
 import logging
 import os
 from argparse import ArgumentParser
+from collections import namedtuple
+from functools import partial
 import gradio as gr
 from bot_requests import BotClient
 os.environ["NO_PROXY"] = "localhost,127.0.0.1"  # Disable proxy
 logging.root.setLevel(logging.INFO)
+MULTI_MODEL_PREFIX = "ERNIE-4.5-VL"
 def get_args() -> argparse.Namespace:
     """
     parser = ArgumentParser(description="ERNIE models web chat demo.")
+    parser.add_argument("--server-port", type=int, default=7860, help="Demo server port.")
+    parser.add_argument("--server-name", type=str, default="0.0.0.0", help="Demo server name.")
+    parser.add_argument("--max_char", type=int, default=8000, help="Maximum character limit for messages.")
+    parser.add_argument("--max_retry_num", type=int, default=3, help="Maximum retry number for request.")
     parser.add_argument(
+        "--model_map",
+        type=str,
         default="""{
             "ernie-4.5-turbo-128k-preview": "https://qianfan.baidubce.com/v2",
             "ernie-4.5-21b-a3b": "https://qianfan.baidubce.com/v2",
             - Prefix determines model capabilities:
             * ERNIE-4.5[-*]: Text-only model
             * ERNIE-4.5-VL[-*]: Multimodal models (image+text)
+            """,
     )
     args = parser.parse_args()
     return args
+class GradioEvents:
     """
     Central handler for all Gradio interface events in the chatbot demo. Provides static methods
     for processing user interactions including:
     - Conversation state management
     - Image handling and URL conversion
     - Component visibility control
+    Coordinates with BotClient to interface with backend models while maintaining
     conversation history and handling multimodal inputs.
     """
     @staticmethod
     def get_image_url(image_path: str) -> str:
         """
+        Converts an image file at the given path to a base64 encoded data URL
+        that can be used directly in HTML or Gradio interfaces.
+        Reads the image file, encodes it in base64 format, and constructs
         a data URL with the appropriate image MIME type.
         Args:
         extension = image_path.split(".")[-1]
         with open(image_path, "rb") as image_file:
             base64_image = base64.b64encode(image_file.read()).decode("utf-8")
+        url = f"data:image/{extension};base64,{base64_image}"
         return url
     @staticmethod
     def chat_stream(
+        query: str,
+        task_history: list,
+        image_history: dict,
+        model_name: str,
+        file_url: str,
+        system_msg: str,
+        max_tokens: int,
+        temperature: float,
+        top_p: float,
+        bot_client: BotClient,
     ) -> str:
         """
+        Handles streaming chat interactions by processing user queries and
+        generating real-time responses from the bot client. Constructs conversation
+        history including system messages, text inputs and image attachments, then
         streams back model responses.
         Args:
         for idx, (query_h, response_h) in enumerate(task_history):
             if idx in image_history:
                 content = []
+                content.append(
+                    {"type": "image_url", "image_url": {"url": GradioEvents.get_image_url(image_history[idx])}}
+                )
                 content.append({"type": "text", "text": query_h})
                 conversation.append({"role": "user", "content": content})
             else:
             for chunk in bot_client.process_stream(model_name, req_data, max_tokens, temperature, top_p):
                 if "error" in chunk:
                     raise Exception(chunk["error"])
                 message = chunk.get("choices", [{}])[0].get("delta", {})
                 content = message.get("content", "")
                 if content:
                     yield content
         except Exception as e:
             raise gr.Error("Exception: " + repr(e))
     @staticmethod
     def predict_stream(
+        query: str,
+        chatbot: list,
+        task_history: list,
+        image_history: dict,
+        model: str,
+        file_url: str,
+        system_msg: str,
+        max_tokens: int,
+        temperature: float,
+        top_p: float,
+        bot_client: BotClient,
     ) -> list:
         """
         Processes user queries in a streaming manner by coordinating with the chat stream handler,
             list: A list containing the updated chatbot state after processing the user's query.
         """
+        logging.info(f"User: {query}")
+        chatbot.append({"role": "user", "content": query})
         # First yield the chatbot with user message
         yield chatbot
         new_texts = GradioEvents.chat_stream(
+            query, task_history, image_history, model, file_url, system_msg, max_tokens, temperature, top_p, bot_client
         )
         response = ""
+        for new_text in new_texts:
             response += new_text
             # Remove previous message if exists
             if chatbot[-1].get("role") == "assistant":
                 chatbot.pop(-1)
                 chatbot.append({"role": "assistant", "content": response})
                 yield chatbot
+        logging.info(f"History: {task_history}")
+        task_history.append((query, response))
+        logging.info(f"ERNIE models: {response}")
     @staticmethod
     def regenerate(
+        chatbot: list,
+        task_history: list,
+        image_history: dict,
+        model: str,
+        file_url: str,
+        system_msg: str,
+        max_tokens: int,
+        temperature: float,
+        top_p: float,
+        bot_client: BotClient,
     ) -> list:
         """
+        Reconstructs the conversation context by removing the last interaction and
+        reprocesses the user's previous query to generate a fresh response. Maintains
         consistency in conversation flow while allowing response regeneration.
         Args:
             chatbot.pop(-1)
         chatbot.pop(-1)
+        yield from GradioEvents.predict_stream(
+            item[0],
+            chatbot,
+            task_history,
             image_history,
+            model,
             file_url,
+            system_msg,
+            max_tokens,
+            temperature,
             top_p,
+            bot_client,
+        )
     @staticmethod
     def reset_user_input() -> gr.update:
         """
         Reset user input field value to empty string.
         Returns:
             gr.update: Update object representing the new value of the user input field.
         """
     def reset_state() -> tuple:
         """
         Reset all states including chatbot, task_history, image_history, and file_btn.
         Returns:
             tuple: A tuple containing the following values:
                 - chatbot (list): An empty list that represents the cleared chatbot state.
                 - file_btn (gr.update): An update object that sets the value of the file button to None.
         """
         GradioEvents.gc()
+        reset_result = namedtuple("reset_result", ["chatbot", "task_history", "image_history", "file_btn"])
         return reset_result(
             [],  # clear chatbot
             [],  # clear task_history
             {},  # clear image_history
             gr.update(value=None),  # clear file_btn
         )
     @staticmethod
     def gc():
         """Run garbage collection to free up memory resources."""
     def toggle_components_visibility(model_name: str) -> gr.update:
         """
         Toggle visibility of components depending on the selected model name.
         Args:
             model_name (str): Name of the selected model.
         Returns:
             gr.update: An update object representing the visibility of the file button.
         """
 def launch_demo(args: argparse.Namespace, bot_client: BotClient):
     """
     Launch demo program
     Args:
         args (argparse.Namespace): argparse Namespace object containing parsed command line arguments
         bot_client (BotClient): Bot client instance
     """
     with gr.Blocks(css=css) as demo:
         logo_url = GradioEvents.get_image_url("assets/logo.png")
+        gr.Markdown(
+            f"""\
+                <p align="center"><img src="{logo_url}" \
+                style="height: 60px"/><p>"""
+        )
         gr.Markdown(
             """\
 <center><font size=3>This demo is based on ERNIE models. \
 (本演示基于文心大模型实现。)</center>"""
         )
+        chatbot = gr.Chatbot(label="ERNIE", elem_classes="control-height", type="messages")
         model_names = list(args.model_map.keys())
         with gr.Row():
             model_name = gr.Dropdown(
+                label="Select Model", choices=model_names, value=model_names[0], allow_custom_value=True
             )
             file_btn = gr.File(
+                label="Image upload (Active only for multimodal models. Accepted formats: PNG, JPEG, JPG)",
+                height="80px",
+                visible=True,
                 file_types=[".png", ".jpeg", "jpg"],
+                elem_id="file-upload",
             )
         query = gr.Textbox(label="Input", elem_id="text_input")
                 system_message,
                 gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max new tokens"),
                 gr.Slider(minimum=0.1, maximum=1.0, value=1.0, step=0.05, label="Temperature"),
+                gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.05, label="Top-p (nucleus sampling)"),
             ]
         task_history = gr.State([])
         image_history = gr.State({})
+        model_name.change(GradioEvents.toggle_components_visibility, inputs=model_name, outputs=file_btn)
         model_name.change(
+            GradioEvents.reset_state, outputs=[chatbot, task_history, image_history, file_btn], show_progress=True
         )
+        predict_with_clients = partial(GradioEvents.predict_stream, bot_client=bot_client)
+        regenerate_with_clients = partial(GradioEvents.regenerate, bot_client=bot_client)
         query.submit(
+            predict_with_clients,
+            inputs=[query, chatbot, task_history, image_history, model_name, file_btn] + additional_inputs,
+            outputs=[chatbot],
+            show_progress=True,
         )
         query.submit(GradioEvents.reset_user_input, [], [query])
         submit_btn.click(
+            predict_with_clients,
+            inputs=[query, chatbot, task_history, image_history, model_name, file_btn] + additional_inputs,
+            outputs=[chatbot],
             show_progress=True,
         )
         submit_btn.click(GradioEvents.reset_user_input, [], [query])
         empty_btn.click(
+            GradioEvents.reset_state, outputs=[chatbot, task_history, image_history, file_btn], show_progress=True
         )
         regen_btn.click(
+            regenerate_with_clients,
+            inputs=[chatbot, task_history, image_history, model_name, file_btn] + additional_inputs,
+            outputs=[chatbot],
+            show_progress=True,
         )
+        demo.load(GradioEvents.toggle_components_visibility, inputs=gr.State(model_names[0]), outputs=file_btn)
+    demo.queue().launch(server_port=args.server_port, server_name=args.server_name)
 def main():
     """Main function that runs when this script is executed."""
     bot_client = BotClient(args)
     launch_demo(args, bot_client)
 if __name__ == "__main__":
     main()

bot_requests.py CHANGED Viewed

@@ -14,22 +14,23 @@
 """BotClient class for interacting with bot models."""
-import os
 import argparse
 import logging
 import traceback
-import json
 import jieba
 from openai import OpenAI
-import requests
-class BotClient(object):
     """Client for interacting with various AI models."""
     def __init__(self, args: argparse.Namespace):
         """
-        Initializes the BotClient instance by configuring essential parameters from command line arguments
-        including retry limits, character constraints, model endpoints and API credentials while setting up
         default values for missing arguments to ensure robust operation.
         Args:
@@ -37,7 +38,7 @@ class BotClient(object):
                                       Uses getattr() to safely retrieve values with fallback defaults.
         """
         self.logger = logging.getLogger(__name__)
         self.max_retry_num = getattr(args, 'max_retry_num', 3)
         self.max_char = getattr(args, 'max_char', 8000)
@@ -54,8 +55,8 @@ class BotClient(object):
     def call_back(self, host_url: str, req_data: dict) -> dict:
         """
-        Executes an HTTP request to the specified endpoint using the OpenAI client, handles the response
-        conversion to a compatible dictionary format, and manages any exceptions that may occur during
         the request process while logging errors appropriately.
         Args:
@@ -68,20 +69,18 @@ class BotClient(object):
         """
         try:
             client = OpenAI(base_url=host_url, api_key=self.api_key)
-            response = client.chat.completions.create(
-                **req_data
-            )
             # Convert OpenAI response to compatible format
             return response.model_dump()
         except Exception as e:
-            self.logger.error("Stream request failed: {}".format(e))
             raise
     def call_back_stream(self, host_url: str, req_data: dict) -> dict:
         """
-        Makes a streaming HTTP request to the specified host URL using the OpenAI client and yields response chunks
         in real-time while handling any exceptions that may occur during the streaming process.
         Args:
@@ -100,25 +99,20 @@ class BotClient(object):
             for chunk in response:
                 if not chunk.choices:
                     continue
                 # Convert OpenAI response to compatible format
                 yield chunk.model_dump()
         except Exception as e:
-            self.logger.error("Stream request failed: {}".format(e))
             raise
     def process(
-        self,
-        model_name: str,
-        req_data: dict,
-        max_tokens: int=2048,
-        temperature: float=1.0,
-        top_p: float=0.7
     ) -> dict:
         """
-        Handles chat completion requests by mapping the model name to its endpoint, preparing request parameters
-        including token limits and sampling settings, truncating messages to fit character limits, making API calls
         with built-in retry mechanism, and logging the full request/response cycle for debugging purposes.
         Args:
@@ -140,7 +134,7 @@ class BotClient(object):
         req_data["messages"] = self.truncate_messages(req_data["messages"])
         for _ in range(self.max_retry_num):
             try:
-                self.logger.info("[MODEL] {}".format(model_url))
                 self.logger.info("[req_data]====>")
                 self.logger.info(json.dumps(req_data, ensure_ascii=False))
                 res = self.call_back(model_url, req_data)
@@ -153,15 +147,11 @@ class BotClient(object):
                 res = {}
             if len(res) != 0 and "error" not in res:
                 break
         return res
     def process_stream(
-        self, model_name: str,
-        req_data: dict,
-        max_tokens: int=2048,
-        temperature: float=1.0,
-        top_p: float=0.7
     ) -> dict:
         """
         Processes streaming requests by mapping the model name to its endpoint, configuring request parameters,
@@ -184,29 +174,28 @@ class BotClient(object):
         req_data["temperature"] = temperature
         req_data["top_p"] = top_p
         req_data["messages"] = self.truncate_messages(req_data["messages"])
         last_error = None
         for _ in range(self.max_retry_num):
             try:
-                self.logger.info("[MODEL] {}".format(model_url))
                 self.logger.info("[req_data]====>")
                 self.logger.info(json.dumps(req_data, ensure_ascii=False))
-                for chunk in self.call_back_stream(model_url, req_data):
-                    yield chunk
                 return
             except Exception as e:
                 last_error = e
-                self.logger.error("Stream request failed (attempt {}/{}): {}".format(_ + 1, self.max_retry_num, e))
         self.logger.error("All retry attempts failed for stream request")
         yield {"error": str(last_error)}
     def cut_chinese_english(self, text: str) -> list:
         """
-        Segments mixed Chinese and English text into individual components using Jieba for Chinese words
-        while preserving English words as whole units, with special handling for Unicode character ranges
         to distinguish between the two languages.
         Args:
@@ -239,10 +228,10 @@ class BotClient(object):
         """
         if not messages:
             return messages
         processed = []
         total_units = 0
         for msg in messages:
             # Handle two different content formats
             if isinstance(msg["content"], str):
@@ -251,31 +240,33 @@ class BotClient(object):
                 text_content = msg["content"][1]["text"]
             else:
                 text_content = ""
             # Calculate unit count after tokenization
             units = self.cut_chinese_english(text_content)
             unit_count = len(units)
-            processed.append({
-                "role": msg["role"],
-                "original_content": msg["content"],  # Preserve original content
-                "text_content": text_content,        # Extracted plain text
-                "units": units,
-                "unit_count": unit_count
-            })
             total_units += unit_count
         if total_units <= self.max_char:
             return messages
         # Number of units to remove
         to_remove = total_units - self.max_char
         # 1. Truncate historical messages
         for i in range(len(processed) - 1, 1):
             if to_remove <= 0:
                 break
             # current = processed[i]
             if processed[i]["unit_count"] <= to_remove:
                 processed[i]["text_content"] = ""
@@ -293,7 +284,7 @@ class BotClient(object):
                 elif isinstance(processed[i]["original_content"], list):
                     processed[i]["original_content"][1]["text"] = new_text
                 to_remove = 0
         # 2. Truncate system message
         if to_remove > 0:
             system_msg = processed[0]
@@ -313,7 +304,7 @@ class BotClient(object):
                 elif isinstance(processed[0]["original_content"], list):
                     processed[0]["original_content"][1]["text"] = new_text
                 to_remove = 0
         # 3. Truncate last message
         if to_remove > 0 and len(processed) > 1:
             last_msg = processed[-1]
@@ -331,15 +322,12 @@ class BotClient(object):
                     last_msg["original_content"] = ""
                 elif isinstance(last_msg["original_content"], list):
                     last_msg["original_content"][1]["text"] = ""
         result = []
         for msg in processed:
             if msg["text_content"]:
-                result.append({
-                    "role": msg["role"],
-                    "content": msg["original_content"]
-                })
         return result
     def embed_fn(self, text: str) -> list:
@@ -366,17 +354,14 @@ class BotClient(object):
         Returns:
             list: List of responses from the AI Search service.
         """
-        headers = {
-            "Authorization": "Bearer " + self.qianfan_api_key,
-            "Content-Type": "application/json"
-        }
         results = []
         top_k = self.max_search_results_num // len(query_list)
         for query in query_list:
             payload = {
                 "messages": [{"role": "user", "content": query}],
-                "resource_type_filter": [{"type": "web", "top_k": top_k}]
             }
             response = requests.post(self.web_search_service_url, headers=headers, json=payload)
@@ -387,4 +372,4 @@ class BotClient(object):
             else:
                 self.logger.info(f"请求失败，状态码: {response.status_code}")
                 self.logger.info(response.text)
-        return results

 """BotClient class for interacting with bot models."""
 import argparse
+import json
 import logging
 import traceback
 import jieba
+import requests
 from openai import OpenAI
+class BotClient:
     """Client for interacting with various AI models."""
     def __init__(self, args: argparse.Namespace):
         """
+        Initializes the BotClient instance by configuring essential parameters from command line arguments
+        including retry limits, character constraints, model endpoints and API credentials while setting up
         default values for missing arguments to ensure robust operation.
         Args:
                                       Uses getattr() to safely retrieve values with fallback defaults.
         """
         self.logger = logging.getLogger(__name__)
         self.max_retry_num = getattr(args, 'max_retry_num', 3)
         self.max_char = getattr(args, 'max_char', 8000)
     def call_back(self, host_url: str, req_data: dict) -> dict:
         """
+        Executes an HTTP request to the specified endpoint using the OpenAI client, handles the response
+        conversion to a compatible dictionary format, and manages any exceptions that may occur during
         the request process while logging errors appropriately.
         Args:
         """
         try:
             client = OpenAI(base_url=host_url, api_key=self.api_key)
+            response = client.chat.completions.create(**req_data)
             # Convert OpenAI response to compatible format
             return response.model_dump()
         except Exception as e:
+            self.logger.error(f"Stream request failed: {e}")
             raise
     def call_back_stream(self, host_url: str, req_data: dict) -> dict:
         """
+        Makes a streaming HTTP request to the specified host URL using the OpenAI client and yields response chunks
         in real-time while handling any exceptions that may occur during the streaming process.
         Args:
             for chunk in response:
                 if not chunk.choices:
                     continue
                 # Convert OpenAI response to compatible format
                 yield chunk.model_dump()
         except Exception as e:
+            self.logger.error(f"Stream request failed: {e}")
             raise
     def process(
+        self, model_name: str, req_data: dict, max_tokens: int = 2048, temperature: float = 1.0, top_p: float = 0.7
     ) -> dict:
         """
+        Handles chat completion requests by mapping the model name to its endpoint, preparing request parameters
+        including token limits and sampling settings, truncating messages to fit character limits, making API calls
         with built-in retry mechanism, and logging the full request/response cycle for debugging purposes.
         Args:
         req_data["messages"] = self.truncate_messages(req_data["messages"])
         for _ in range(self.max_retry_num):
             try:
+                self.logger.info(f"[MODEL] {model_url}")
                 self.logger.info("[req_data]====>")
                 self.logger.info(json.dumps(req_data, ensure_ascii=False))
                 res = self.call_back(model_url, req_data)
                 res = {}
             if len(res) != 0 and "error" not in res:
                 break
         return res
     def process_stream(
+        self, model_name: str, req_data: dict, max_tokens: int = 2048, temperature: float = 1.0, top_p: float = 0.7
     ) -> dict:
         """
         Processes streaming requests by mapping the model name to its endpoint, configuring request parameters,
         req_data["temperature"] = temperature
         req_data["top_p"] = top_p
         req_data["messages"] = self.truncate_messages(req_data["messages"])
         last_error = None
         for _ in range(self.max_retry_num):
             try:
+                self.logger.info(f"[MODEL] {model_url}")
                 self.logger.info("[req_data]====>")
                 self.logger.info(json.dumps(req_data, ensure_ascii=False))
+                yield from self.call_back_stream(model_url, req_data)
                 return
             except Exception as e:
                 last_error = e
+                self.logger.error(f"Stream request failed (attempt {_ + 1}/{self.max_retry_num}): {e}")
         self.logger.error("All retry attempts failed for stream request")
         yield {"error": str(last_error)}
     def cut_chinese_english(self, text: str) -> list:
         """
+        Segments mixed Chinese and English text into individual components using Jieba for Chinese words
+        while preserving English words as whole units, with special handling for Unicode character ranges
         to distinguish between the two languages.
         Args:
         """
         if not messages:
             return messages
         processed = []
         total_units = 0
         for msg in messages:
             # Handle two different content formats
             if isinstance(msg["content"], str):
                 text_content = msg["content"][1]["text"]
             else:
                 text_content = ""
             # Calculate unit count after tokenization
             units = self.cut_chinese_english(text_content)
             unit_count = len(units)
+            processed.append(
+                {
+                    "role": msg["role"],
+                    "original_content": msg["content"],  # Preserve original content
+                    "text_content": text_content,  # Extracted plain text
+                    "units": units,
+                    "unit_count": unit_count,
+                }
+            )
             total_units += unit_count
         if total_units <= self.max_char:
             return messages
         # Number of units to remove
         to_remove = total_units - self.max_char
         # 1. Truncate historical messages
         for i in range(len(processed) - 1, 1):
             if to_remove <= 0:
                 break
             # current = processed[i]
             if processed[i]["unit_count"] <= to_remove:
                 processed[i]["text_content"] = ""
                 elif isinstance(processed[i]["original_content"], list):
                     processed[i]["original_content"][1]["text"] = new_text
                 to_remove = 0
         # 2. Truncate system message
         if to_remove > 0:
             system_msg = processed[0]
                 elif isinstance(processed[0]["original_content"], list):
                     processed[0]["original_content"][1]["text"] = new_text
                 to_remove = 0
         # 3. Truncate last message
         if to_remove > 0 and len(processed) > 1:
             last_msg = processed[-1]
                     last_msg["original_content"] = ""
                 elif isinstance(last_msg["original_content"], list):
                     last_msg["original_content"][1]["text"] = ""
         result = []
         for msg in processed:
             if msg["text_content"]:
+                result.append({"role": msg["role"], "content": msg["original_content"]})
         return result
     def embed_fn(self, text: str) -> list:
         Returns:
             list: List of responses from the AI Search service.
         """
+        headers = {"Authorization": "Bearer " + self.qianfan_api_key, "Content-Type": "application/json"}
         results = []
         top_k = self.max_search_results_num // len(query_list)
         for query in query_list:
             payload = {
                 "messages": [{"role": "user", "content": query}],
+                "resource_type_filter": [{"type": "web", "top_k": top_k}],
             }
             response = requests.post(self.web_search_service_url, headers=headers, json=payload)
             else:
                 self.logger.info(f"请求失败，状态码: {response.status_code}")
                 self.logger.info(response.text)
+        return results