Spaces:

baidu
/

knowledge_retrieval_demo

Running

App Files Files Community

maxiaolong03 commited on Jul 19

Commit

4d5ee59

1 Parent(s): b5e5daf

add files

Browse files

Files changed (2) hide show

app.py +152 -38
bot_requests.py +88 -77

app.py CHANGED Viewed

@@ -109,14 +109,25 @@ def get_args() -> argparse.Namespace:
     """
     parser = ArgumentParser(description="ERNIE models web chat demo.")
-    parser.add_argument("--server-port", type=int, default=7860, help="Demo server port.")
-    parser.add_argument("--server-name", type=str, default="0.0.0.0", help="Demo server name.")
-    parser.add_argument("--max_char", type=int, default=20000, help="Maximum character limit for messages.")
-    parser.add_argument("--max_retry_num", type=int, default=3, help="Maximum retry number for request.")
     parser.add_argument(
         "--model_map",
         type=str,
-        default="{\"ernie-4.5-turbo-128k-preview\": \"https://qianfan.baidubce.com/v2\"}",
         help="""JSON string defining model name to endpoint mappings.
             Required Format:
             {"ERNIE-4.5": "http://localhost:port/v1"}
@@ -129,15 +140,56 @@ def get_args() -> argparse.Namespace:
             """,
     )
     parser.add_argument(
-        "--embedding_service_url", type=str, default="https://qianfan.baidubce.com/v2", help="Embedding service url."
     )
-    parser.add_argument("--qianfan_api_key", type=str, default=os.environ.get("API_KEY"), help="Qianfan API key.")
-    parser.add_argument("--embedding_model", type=str, default="embedding-v1", help="Embedding model name.")
-    parser.add_argument("--embedding_dim", type=int, default=384, help="Dimension of the embedding vector.")
-    parser.add_argument("--chunk_size", type=int, default=512, help="Chunk size for splitting long documents.")
-    parser.add_argument("--top_k", type=int, default=3, help="Top k results to retrieve.")
-    parser.add_argument("--faiss_index_path", type=str, default="data/faiss_index", help="Faiss index path.")
-    parser.add_argument("--text_db_path", type=str, default="data/text_db.jsonl", help="Text database path.")
     args = parser.parse_args()
     try:
@@ -179,11 +231,14 @@ class FaissTextDatabase:
         # If faiss_index_path exists, load it and text_db_path
         if os.path.exists(self.faiss_index_path) and os.path.exists(self.text_db_path):
             self.index = faiss.read_index(self.faiss_index_path)
-            with open(self.text_db_path, 'r', encoding='utf-8') as f:
                 self.text_db = json.load(f)
         else:
             self.index = faiss.IndexFlatIP(self.embedding_dim)
-            self.text_db = {"file_md5s": [], "chunks": []}  # Save file_md5s to avoid duplicates  # Save chunks
     def calculate_md5(self, file_path: str) -> str:
         """
@@ -212,7 +267,11 @@ class FaissTextDatabase:
         return file_md5 in self.text_db["file_md5s"]
     def add_embeddings(
-        self, file_path: str, segments: list[str], progress_bar: gr.Progress = None, save_file: bool = False
     ) -> bool:
         """
         Stores document embeddings in FAISS database after checking for duplicates.
@@ -241,7 +300,7 @@ class FaissTextDatabase:
             if progress_bar is not None:
                 progress_bar((i + 1) / len(segments), desc=file_name + " Processing...")
         vectors = np.array(vectors)
-        self.index.add(vectors.astype('float32'))
         start_id = len(self.text_db["chunks"])
         for i, text in enumerate(segments):
@@ -275,7 +334,7 @@ class FaissTextDatabase:
         # Step 1: Retrieve top_k results for each query and collect all indices
         all_indices = []
         for query in query_list:
-            query_vector = np.array([self.bot_client.embed_fn(query)]).astype('float32')
             _, indices = self.index.search(query_vector, self.top_k)
             all_indices.extend(indices[0].tolist())
@@ -293,12 +352,17 @@ class FaissTextDatabase:
             if target_file_md5 not in file_boundaries:
                 file_start = target_idx
-                while file_start > 0 and self.text_db["chunks"][file_start - 1]["file_md5"] == target_file_md5:
                     file_start -= 1
                 file_end = target_idx
                 while (
                     file_end < len(self.text_db["chunks"]) - 1
-                    and self.text_db["chunks"][file_end + 1]["file_md5"] == target_file_md5
                 ):
                     file_end += 1
             else:
@@ -330,7 +394,9 @@ class FaissTextDatabase:
         # Step 5: Create merged text for each group
         result = ""
         for idx, group in enumerate(groups):
-            result += "\n段落{idx}:\n{title}\n".format(idx=idx + 1, title=self.text_db["chunks"][group[0]]["file_txt"])
             for idx in group:
                 result += self.text_db["chunks"][idx]["text"] + "\n"
             self.logger.info(f"Merged chunk range: {group[0]}-{group[-1]}")
@@ -341,7 +407,7 @@ class FaissTextDatabase:
         """Save the database to disk."""
         faiss.write_index(self.index, self.faiss_index_path)
-        with open(self.text_db_path, 'w', encoding='utf-8') as f:
             json.dump(self.text_db, f, ensure_ascii=False, indent=2)
@@ -396,19 +462,26 @@ class GradioEvents:
         Yields:
             dict: A dictionary containing the event type and its corresponding content.
         """
-        conversation, conversation_str = GradioEvents.get_history_conversation(task_history)
         conversation_str += f"user:\n{query}\n"
         search_info_message = QUERY_REWRITE_PROMPT.format(
-            TIMESTAMP=datetime.now().strftime("%Y-%m-%d %H:%M:%S"), CONVERSATION=conversation_str
         )
         search_conversation = [{"role": "user", "content": search_info_message}]
-        search_info_result = GradioEvents.get_sub_query(search_conversation, model, bot_client)
         if search_info_result is None:
             search_info_result = {"query": [query]}
         if search_info_result.get("query", []):
-            relevant_passages = faiss_db.search_with_context(search_info_result["query"])
             yield {"type": "relevant_passage", "content": relevant_passages}
             query = ANSWER_PROMPT.format(
@@ -562,7 +635,9 @@ class GradioEvents:
         """
         GradioEvents.gc()
-        reset_result = namedtuple("reset_result", ["chatbot", "task_history", "file_btn", "relevant_passage"])
         return reset_result(
             [],  # clear chatbot
             [],  # clear task_history
@@ -600,7 +675,9 @@ class GradioEvents:
         return url
     @staticmethod
-    def get_sub_query(conversation: list, model_name: str, bot_client: BotClient) -> dict:
         """
         Enhances user queries by generating alternative phrasings using language models.
         Creates semantically similar variations of the original query to improve retrieval accuracy.
@@ -644,7 +721,20 @@ class GradioEvents:
         Returns:
             tuple: Two strings, the first part of the original line and the rest of the line.
         """
-        PUNCTUATIONS = {".", "。", "!", "！", "?", "？", ",", "，", ";", "；", ":", "："}
         if len(line) <= chunk_size:
             return line, ""
@@ -711,7 +801,10 @@ class GradioEvents:
     @staticmethod
     def file_upload(
-        files_url: list, chunk_size: int, faiss_db: FaissTextDatabase, progress_bar: gr.Progress = gr.Progress()
     ) -> str:
         """
         Uploads and processes multiple files by splitting them into semantically meaningful chunks,
@@ -730,7 +823,9 @@ class GradioEvents:
             return
         yield gr.update(visible=True)
         for file_url in files_url:
-            if not GradioEvents.save_file_to_db(file_url, chunk_size, faiss_db, progress_bar):
                 file_name = os.path.basename(file_url)
                 gr.Info(f"{file_name} already processed.")
@@ -779,7 +874,11 @@ class GradioEvents:
             return False
-def launch_demo(args: argparse.Namespace, bot_client: BotClient, faiss_db_template: FaissTextDatabase):
     """
     Launch demo program
@@ -843,7 +942,11 @@ def launch_demo(args: argparse.Namespace, bot_client: BotClient, faiss_db_templa
                 file_count="multiple",
             )
             relevant_passage = gr.Textbox(
-                label="Relevant Passage", lines=5, max_lines=5, placeholder=RELEVANT_PASSAGE_DEFAULT, interactive=False
             )
         with gr.Row():
             progress_bar = gr.Textbox(label="Progress", visible=False)
@@ -857,8 +960,12 @@ def launch_demo(args: argparse.Namespace, bot_client: BotClient, faiss_db_templa
         task_history = gr.State([])
-        predict_with_clients = partial(GradioEvents.predict_stream, bot_client=bot_client)
-        regenerate_with_clients = partial(GradioEvents.regenerate, bot_client=bot_client)
         file_upload_with_clients = partial(
             GradioEvents.file_upload,
         )
@@ -884,7 +991,9 @@ def launch_demo(args: argparse.Namespace, bot_client: BotClient, faiss_db_templa
         )
         submit_btn.click(GradioEvents.reset_user_input, [], [query])
         empty_btn.click(
-            GradioEvents.reset_state, outputs=[chatbot, task_history, file_btn, relevant_passage], show_progress=True
         )
         regen_btn.click(
             regenerate_with_clients,
@@ -893,7 +1002,10 @@ def launch_demo(args: argparse.Namespace, bot_client: BotClient, faiss_db_templa
             show_progress=True,
         )
-    demo.queue().launch(server_port=args.server_port, server_name=args.server_name)
 def main():
@@ -903,7 +1015,9 @@ def main():
     faiss_db = FaissTextDatabase(args, bot_client)
     # Run file upload function to save default knowledge base.
-    GradioEvents.save_file_to_db(FILE_URL_DEFAULT, args.chunk_size, faiss_db, save_file=True)
     launch_demo(args, bot_client, faiss_db)

     """
     parser = ArgumentParser(description="ERNIE models web chat demo.")
+    parser.add_argument(
+        "--server-port", type=int, default=7860, help="Demo server port."
+    )
+    parser.add_argument(
+        "--server-name", type=str, default="0.0.0.0", help="Demo server name."
+    )
+    parser.add_argument(
+        "--max_char",
+        type=int,
+        default=20000,
+        help="Maximum character limit for messages.",
+    )
+    parser.add_argument(
+        "--max_retry_num", type=int, default=3, help="Maximum retry number for request."
+    )
     parser.add_argument(
         "--model_map",
         type=str,
+        default='{"ernie-4.5-turbo-128k-preview": "https://qianfan.baidubce.com/v2"}',
         help="""JSON string defining model name to endpoint mappings.
             Required Format:
             {"ERNIE-4.5": "http://localhost:port/v1"}
             """,
     )
     parser.add_argument(
+        "--embedding_service_url",
+        type=str,
+        default="https://qianfan.baidubce.com/v2",
+        help="Embedding service url.",
+    )
+    parser.add_argument(
+        "--qianfan_api_key",
+        type=str,
+        default=os.environ.get("API_KEY"),
+        help="Qianfan API key.",
+    )
+    parser.add_argument(
+        "--embedding_model",
+        type=str,
+        default="embedding-v1",
+        help="Embedding model name.",
+    )
+    parser.add_argument(
+        "--embedding_dim",
+        type=int,
+        default=384,
+        help="Dimension of the embedding vector.",
+    )
+    parser.add_argument(
+        "--chunk_size",
+        type=int,
+        default=512,
+        help="Chunk size for splitting long documents.",
+    )
+    parser.add_argument(
+        "--top_k", type=int, default=3, help="Top k results to retrieve."
+    )
+    parser.add_argument(
+        "--faiss_index_path",
+        type=str,
+        default="data/faiss_index",
+        help="Faiss index path.",
+    )
+    parser.add_argument(
+        "--text_db_path",
+        type=str,
+        default="data/text_db.jsonl",
+        help="Text database path.",
+    )
+    parser.add_argument(
+        "--concurrency_limit", type=int, default=10, help="Default concurrency limit."
+    )
+    parser.add_argument(
+        "--max_queue_size", type=int, default=50, help="Maximum queue size for request."
     )
     args = parser.parse_args()
     try:
         # If faiss_index_path exists, load it and text_db_path
         if os.path.exists(self.faiss_index_path) and os.path.exists(self.text_db_path):
             self.index = faiss.read_index(self.faiss_index_path)
+            with open(self.text_db_path, "r", encoding="utf-8") as f:
                 self.text_db = json.load(f)
         else:
             self.index = faiss.IndexFlatIP(self.embedding_dim)
+            self.text_db = {
+                "file_md5s": [],
+                "chunks": [],
+            }  # Save file_md5s to avoid duplicates  # Save chunks
     def calculate_md5(self, file_path: str) -> str:
         """
         return file_md5 in self.text_db["file_md5s"]
     def add_embeddings(
+        self,
+        file_path: str,
+        segments: list[str],
+        progress_bar: gr.Progress = None,
+        save_file: bool = False,
     ) -> bool:
         """
         Stores document embeddings in FAISS database after checking for duplicates.
             if progress_bar is not None:
                 progress_bar((i + 1) / len(segments), desc=file_name + " Processing...")
         vectors = np.array(vectors)
+        self.index.add(vectors.astype("float32"))
         start_id = len(self.text_db["chunks"])
         for i, text in enumerate(segments):
         # Step 1: Retrieve top_k results for each query and collect all indices
         all_indices = []
         for query in query_list:
+            query_vector = np.array([self.bot_client.embed_fn(query)]).astype("float32")
             _, indices = self.index.search(query_vector, self.top_k)
             all_indices.extend(indices[0].tolist())
             if target_file_md5 not in file_boundaries:
                 file_start = target_idx
+                while (
+                    file_start > 0
+                    and self.text_db["chunks"][file_start - 1]["file_md5"]
+                    == target_file_md5
+                ):
                     file_start -= 1
                 file_end = target_idx
                 while (
                     file_end < len(self.text_db["chunks"]) - 1
+                    and self.text_db["chunks"][file_end + 1]["file_md5"]
+                    == target_file_md5
                 ):
                     file_end += 1
             else:
         # Step 5: Create merged text for each group
         result = ""
         for idx, group in enumerate(groups):
+            result += "\n段落{idx}:\n{title}\n".format(
+                idx=idx + 1, title=self.text_db["chunks"][group[0]]["file_txt"]
+            )
             for idx in group:
                 result += self.text_db["chunks"][idx]["text"] + "\n"
             self.logger.info(f"Merged chunk range: {group[0]}-{group[-1]}")
         """Save the database to disk."""
         faiss.write_index(self.index, self.faiss_index_path)
+        with open(self.text_db_path, "w", encoding="utf-8") as f:
             json.dump(self.text_db, f, ensure_ascii=False, indent=2)
         Yields:
             dict: A dictionary containing the event type and its corresponding content.
         """
+        conversation, conversation_str = GradioEvents.get_history_conversation(
+            task_history
+        )
         conversation_str += f"user:\n{query}\n"
         search_info_message = QUERY_REWRITE_PROMPT.format(
+            TIMESTAMP=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+            CONVERSATION=conversation_str,
         )
         search_conversation = [{"role": "user", "content": search_info_message}]
+        search_info_result = GradioEvents.get_sub_query(
+            search_conversation, model, bot_client
+        )
         if search_info_result is None:
             search_info_result = {"query": [query]}
         if search_info_result.get("query", []):
+            relevant_passages = faiss_db.search_with_context(
+                search_info_result["query"]
+            )
             yield {"type": "relevant_passage", "content": relevant_passages}
             query = ANSWER_PROMPT.format(
         """
         GradioEvents.gc()
+        reset_result = namedtuple(
+            "reset_result", ["chatbot", "task_history", "file_btn", "relevant_passage"]
+        )
         return reset_result(
             [],  # clear chatbot
             [],  # clear task_history
         return url
     @staticmethod
+    def get_sub_query(
+        conversation: list, model_name: str, bot_client: BotClient
+    ) -> dict:
         """
         Enhances user queries by generating alternative phrasings using language models.
         Creates semantically similar variations of the original query to improve retrieval accuracy.
         Returns:
             tuple: Two strings, the first part of the original line and the rest of the line.
         """
+        PUNCTUATIONS = {
+            ".",
+            "。",
+            "!",
+            "！",
+            "?",
+            "？",
+            ",",
+            "，",
+            ";",
+            "；",
+            ":",
+            "：",
+        }
         if len(line) <= chunk_size:
             return line, ""
     @staticmethod
     def file_upload(
+        files_url: list,
+        chunk_size: int,
+        faiss_db: FaissTextDatabase,
+        progress_bar: gr.Progress = gr.Progress(),
     ) -> str:
         """
         Uploads and processes multiple files by splitting them into semantically meaningful chunks,
             return
         yield gr.update(visible=True)
         for file_url in files_url:
+            if not GradioEvents.save_file_to_db(
+                file_url, chunk_size, faiss_db, progress_bar
+            ):
                 file_name = os.path.basename(file_url)
                 gr.Info(f"{file_name} already processed.")
             return False
+def launch_demo(
+    args: argparse.Namespace,
+    bot_client: BotClient,
+    faiss_db_template: FaissTextDatabase,
+):
     """
     Launch demo program
                 file_count="multiple",
             )
             relevant_passage = gr.Textbox(
+                label="Relevant Passage",
+                lines=5,
+                max_lines=5,
+                placeholder=RELEVANT_PASSAGE_DEFAULT,
+                interactive=False,
             )
         with gr.Row():
             progress_bar = gr.Textbox(label="Progress", visible=False)
         task_history = gr.State([])
+        predict_with_clients = partial(
+            GradioEvents.predict_stream, bot_client=bot_client
+        )
+        regenerate_with_clients = partial(
+            GradioEvents.regenerate, bot_client=bot_client
+        )
         file_upload_with_clients = partial(
             GradioEvents.file_upload,
         )
         )
         submit_btn.click(GradioEvents.reset_user_input, [], [query])
         empty_btn.click(
+            GradioEvents.reset_state,
+            outputs=[chatbot, task_history, file_btn, relevant_passage],
+            show_progress=True,
         )
         regen_btn.click(
             regenerate_with_clients,
             show_progress=True,
         )
+    demo.queue(
+        default_concurrency_limit=args.concurrency_limit, max_size=args.max_queue_size
+    )
+    demo.launch(server_port=args.server_port, server_name=args.server_name)
 def main():
     faiss_db = FaissTextDatabase(args, bot_client)
     # Run file upload function to save default knowledge base.
+    GradioEvents.save_file_to_db(
+        FILE_URL_DEFAULT, args.chunk_size, faiss_db, save_file=True
+    )
     launch_demo(args, bot_client, faiss_db)

bot_requests.py CHANGED Viewed

@@ -16,20 +16,22 @@
 import os
 import argparse
 import logging
 import traceback
-import json
 import jieba
 from openai import OpenAI
-import requests
-class BotClient(object):
     """Client for interacting with various AI models."""
     def __init__(self, args: argparse.Namespace):
         """
-        Initializes the BotClient instance by configuring essential parameters from command line arguments
-        including retry limits, character constraints, model endpoints and API credentials while setting up
         default values for missing arguments to ensure robust operation.
         Args:
@@ -37,25 +39,29 @@ class BotClient(object):
                                       Uses getattr() to safely retrieve values with fallback defaults.
         """
         self.logger = logging.getLogger(__name__)
-        self.max_retry_num = getattr(args, 'max_retry_num', 3)
-        self.max_char = getattr(args, 'max_char', 8000)
-        self.model_map = getattr(args, 'model_map', {})
         self.api_key = os.environ.get("API_KEY")
-        self.embedding_service_url = getattr(args, 'embedding_service_url', 'embedding_service_url')
-        self.embedding_model = getattr(args, 'embedding_model', 'embedding_model')
-        self.web_search_service_url = getattr(args, 'web_search_service_url', 'web_search_service_url')
-        self.max_search_results_num = getattr(args, 'max_search_results_num', 15)
         self.qianfan_api_key = os.environ.get("API_KEY")
     def call_back(self, host_url: str, req_data: dict) -> dict:
         """
-        Executes an HTTP request to the specified endpoint using the OpenAI client, handles the response
-        conversion to a compatible dictionary format, and manages any exceptions that may occur during
         the request process while logging errors appropriately.
         Args:
@@ -68,20 +74,18 @@ class BotClient(object):
         """
         try:
             client = OpenAI(base_url=host_url, api_key=self.api_key)
-            response = client.chat.completions.create(
-                **req_data
-            )
             # Convert OpenAI response to compatible format
             return response.model_dump()
         except Exception as e:
-            self.logger.error("Stream request failed: {}".format(e))
             raise
     def call_back_stream(self, host_url: str, req_data: dict) -> dict:
         """
-        Makes a streaming HTTP request to the specified host URL using the OpenAI client and yields response chunks
         in real-time while handling any exceptions that may occur during the streaming process.
         Args:
@@ -100,25 +104,25 @@ class BotClient(object):
             for chunk in response:
                 if not chunk.choices:
                     continue
                 # Convert OpenAI response to compatible format
                 yield chunk.model_dump()
         except Exception as e:
-            self.logger.error("Stream request failed: {}".format(e))
             raise
     def process(
-        self,
-        model_name: str,
-        req_data: dict,
-        max_tokens: int=2048,
-        temperature: float=1.0,
-        top_p: float=0.7
     ) -> dict:
         """
-        Handles chat completion requests by mapping the model name to its endpoint, preparing request parameters
-        including token limits and sampling settings, truncating messages to fit character limits, making API calls
         with built-in retry mechanism, and logging the full request/response cycle for debugging purposes.
         Args:
@@ -140,7 +144,7 @@ class BotClient(object):
         req_data["messages"] = self.truncate_messages(req_data["messages"])
         for _ in range(self.max_retry_num):
             try:
-                self.logger.info("[MODEL] {}".format(model_url))
                 self.logger.info("[req_data]====>")
                 self.logger.info(json.dumps(req_data, ensure_ascii=False))
                 res = self.call_back(model_url, req_data)
@@ -153,15 +157,16 @@ class BotClient(object):
                 res = {}
             if len(res) != 0 and "error" not in res:
                 break
         return res
     def process_stream(
-        self, model_name: str,
-        req_data: dict,
-        max_tokens: int=2048,
-        temperature: float=1.0,
-        top_p: float=0.7
     ) -> dict:
         """
         Processes streaming requests by mapping the model name to its endpoint, configuring request parameters,
@@ -184,29 +189,30 @@ class BotClient(object):
         req_data["temperature"] = temperature
         req_data["top_p"] = top_p
         req_data["messages"] = self.truncate_messages(req_data["messages"])
         last_error = None
         for _ in range(self.max_retry_num):
             try:
-                self.logger.info("[MODEL] {}".format(model_url))
                 self.logger.info("[req_data]====>")
                 self.logger.info(json.dumps(req_data, ensure_ascii=False))
-                for chunk in self.call_back_stream(model_url, req_data):
-                    yield chunk
                 return
             except Exception as e:
                 last_error = e
-                self.logger.error("Stream request failed (attempt {}/{}): {}".format(_ + 1, self.max_retry_num, e))
         self.logger.error("All retry attempts failed for stream request")
         yield {"error": str(last_error)}
     def cut_chinese_english(self, text: str) -> list:
         """
-        Segments mixed Chinese and English text into individual components using Jieba for Chinese words
-        while preserving English words as whole units, with special handling for Unicode character ranges
         to distinguish between the two languages.
         Args:
@@ -219,7 +225,9 @@ class BotClient(object):
         en_ch_words = []
         for word in words:
-            if word.isalpha() and not any("\u4e00" <= char <= "\u9fff" for char in word):
                 en_ch_words.append(word)
             else:
                 en_ch_words.extend(list(word))
@@ -239,10 +247,10 @@ class BotClient(object):
         """
         if not messages:
             return messages
         processed = []
         total_units = 0
         for msg in messages:
             # Handle two different content formats
             if isinstance(msg["content"], str):
@@ -251,31 +259,33 @@ class BotClient(object):
                 text_content = msg["content"][1]["text"]
             else:
                 text_content = ""
             # Calculate unit count after tokenization
             units = self.cut_chinese_english(text_content)
             unit_count = len(units)
-            processed.append({
-                "role": msg["role"],
-                "original_content": msg["content"],  # Preserve original content
-                "text_content": text_content,        # Extracted plain text
-                "units": units,
-                "unit_count": unit_count
-            })
             total_units += unit_count
         if total_units <= self.max_char:
             return messages
         # Number of units to remove
         to_remove = total_units - self.max_char
         # 1. Truncate historical messages
         for i in range(len(processed) - 1, 1):
             if to_remove <= 0:
                 break
             # current = processed[i]
             if processed[i]["unit_count"] <= to_remove:
                 processed[i]["text_content"] = ""
@@ -293,7 +303,7 @@ class BotClient(object):
                 elif isinstance(processed[i]["original_content"], list):
                     processed[i]["original_content"][1]["text"] = new_text
                 to_remove = 0
         # 2. Truncate system message
         if to_remove > 0:
             system_msg = processed[0]
@@ -313,7 +323,7 @@ class BotClient(object):
                 elif isinstance(processed[0]["original_content"], list):
                     processed[0]["original_content"][1]["text"] = new_text
                 to_remove = 0
         # 3. Truncate last message
         if to_remove > 0 and len(processed) > 1:
             last_msg = processed[-1]
@@ -331,15 +341,12 @@ class BotClient(object):
                     last_msg["original_content"] = ""
                 elif isinstance(last_msg["original_content"], list):
                     last_msg["original_content"][1]["text"] = ""
         result = []
         for msg in processed:
             if msg["text_content"]:
-                result.append({
-                    "role": msg["role"],
-                    "content": msg["original_content"]
-                })
         return result
     def embed_fn(self, text: str) -> list:
@@ -352,7 +359,9 @@ class BotClient(object):
         Returns:
             list: A list of floats representing the embedding.
         """
-        client = OpenAI(base_url=self.embedding_service_url, api_key=self.qianfan_api_key)
         response = client.embeddings.create(input=[text], model=self.embedding_model)
         return response.data[0].embedding
@@ -368,7 +377,7 @@ class BotClient(object):
         """
         headers = {
             "Authorization": "Bearer " + self.qianfan_api_key,
-            "Content-Type": "application/json"
         }
         results = []
@@ -376,9 +385,11 @@ class BotClient(object):
         for query in query_list:
             payload = {
                 "messages": [{"role": "user", "content": query}],
-                "resource_type_filter": [{"type": "web", "top_k": top_k}]
             }
-            response = requests.post(self.web_search_service_url, headers=headers, json=payload)
             if response.status_code == 200:
                 response = response.json()
@@ -387,4 +398,4 @@ class BotClient(object):
             else:
                 self.logger.info(f"请求失败，状态码: {response.status_code}")
                 self.logger.info(response.text)
-        return results

 import os
 import argparse
+import json
 import logging
 import traceback
 import jieba
+import requests
 from openai import OpenAI
+class BotClient:
     """Client for interacting with various AI models."""
     def __init__(self, args: argparse.Namespace):
         """
+        Initializes the BotClient instance by configuring essential parameters from command line arguments
+        including retry limits, character constraints, model endpoints and API credentials while setting up
         default values for missing arguments to ensure robust operation.
         Args:
                                       Uses getattr() to safely retrieve values with fallback defaults.
         """
         self.logger = logging.getLogger(__name__)
+        self.max_retry_num = getattr(args, "max_retry_num", 3)
+        self.max_char = getattr(args, "max_char", 8000)
+        self.model_map = getattr(args, "model_map", {})
         self.api_key = os.environ.get("API_KEY")
+        self.embedding_service_url = getattr(
+            args, "embedding_service_url", "embedding_service_url"
+        )
+        self.embedding_model = getattr(args, "embedding_model", "embedding_model")
+        self.web_search_service_url = getattr(
+            args, "web_search_service_url", "web_search_service_url"
+        )
+        self.max_search_results_num = getattr(args, "max_search_results_num", 15)
         self.qianfan_api_key = os.environ.get("API_KEY")
     def call_back(self, host_url: str, req_data: dict) -> dict:
         """
+        Executes an HTTP request to the specified endpoint using the OpenAI client, handles the response
+        conversion to a compatible dictionary format, and manages any exceptions that may occur during
         the request process while logging errors appropriately.
         Args:
         """
         try:
             client = OpenAI(base_url=host_url, api_key=self.api_key)
+            response = client.chat.completions.create(**req_data)
             # Convert OpenAI response to compatible format
             return response.model_dump()
         except Exception as e:
+            self.logger.error(f"Stream request failed: {e}")
             raise
     def call_back_stream(self, host_url: str, req_data: dict) -> dict:
         """
+        Makes a streaming HTTP request to the specified host URL using the OpenAI client and yields response chunks
         in real-time while handling any exceptions that may occur during the streaming process.
         Args:
             for chunk in response:
                 if not chunk.choices:
                     continue
                 # Convert OpenAI response to compatible format
                 yield chunk.model_dump()
         except Exception as e:
+            self.logger.error(f"Stream request failed: {e}")
             raise
     def process(
+        self,
+        model_name: str,
+        req_data: dict,
+        max_tokens: int = 2048,
+        temperature: float = 1.0,
+        top_p: float = 0.7,
     ) -> dict:
         """
+        Handles chat completion requests by mapping the model name to its endpoint, preparing request parameters
+        including token limits and sampling settings, truncating messages to fit character limits, making API calls
         with built-in retry mechanism, and logging the full request/response cycle for debugging purposes.
         Args:
         req_data["messages"] = self.truncate_messages(req_data["messages"])
         for _ in range(self.max_retry_num):
             try:
+                self.logger.info(f"[MODEL] {model_url}")
                 self.logger.info("[req_data]====>")
                 self.logger.info(json.dumps(req_data, ensure_ascii=False))
                 res = self.call_back(model_url, req_data)
                 res = {}
             if len(res) != 0 and "error" not in res:
                 break
         return res
     def process_stream(
+        self,
+        model_name: str,
+        req_data: dict,
+        max_tokens: int = 2048,
+        temperature: float = 1.0,
+        top_p: float = 0.7,
     ) -> dict:
         """
         Processes streaming requests by mapping the model name to its endpoint, configuring request parameters,
         req_data["temperature"] = temperature
         req_data["top_p"] = top_p
         req_data["messages"] = self.truncate_messages(req_data["messages"])
         last_error = None
         for _ in range(self.max_retry_num):
             try:
+                self.logger.info(f"[MODEL] {model_url}")
                 self.logger.info("[req_data]====>")
                 self.logger.info(json.dumps(req_data, ensure_ascii=False))
+                yield from self.call_back_stream(model_url, req_data)
                 return
             except Exception as e:
                 last_error = e
+                self.logger.error(
+                    f"Stream request failed (attempt {_ + 1}/{self.max_retry_num}): {e}"
+                )
         self.logger.error("All retry attempts failed for stream request")
         yield {"error": str(last_error)}
     def cut_chinese_english(self, text: str) -> list:
         """
+        Segments mixed Chinese and English text into individual components using Jieba for Chinese words
+        while preserving English words as whole units, with special handling for Unicode character ranges
         to distinguish between the two languages.
         Args:
         en_ch_words = []
         for word in words:
+            if word.isalpha() and not any(
+                "\u4e00" <= char <= "\u9fff" for char in word
+            ):
                 en_ch_words.append(word)
             else:
                 en_ch_words.extend(list(word))
         """
         if not messages:
             return messages
         processed = []
         total_units = 0
         for msg in messages:
             # Handle two different content formats
             if isinstance(msg["content"], str):
                 text_content = msg["content"][1]["text"]
             else:
                 text_content = ""
             # Calculate unit count after tokenization
             units = self.cut_chinese_english(text_content)
             unit_count = len(units)
+            processed.append(
+                {
+                    "role": msg["role"],
+                    "original_content": msg["content"],  # Preserve original content
+                    "text_content": text_content,  # Extracted plain text
+                    "units": units,
+                    "unit_count": unit_count,
+                }
+            )
             total_units += unit_count
         if total_units <= self.max_char:
             return messages
         # Number of units to remove
         to_remove = total_units - self.max_char
         # 1. Truncate historical messages
         for i in range(len(processed) - 1, 1):
             if to_remove <= 0:
                 break
             # current = processed[i]
             if processed[i]["unit_count"] <= to_remove:
                 processed[i]["text_content"] = ""
                 elif isinstance(processed[i]["original_content"], list):
                     processed[i]["original_content"][1]["text"] = new_text
                 to_remove = 0
         # 2. Truncate system message
         if to_remove > 0:
             system_msg = processed[0]
                 elif isinstance(processed[0]["original_content"], list):
                     processed[0]["original_content"][1]["text"] = new_text
                 to_remove = 0
         # 3. Truncate last message
         if to_remove > 0 and len(processed) > 1:
             last_msg = processed[-1]
                     last_msg["original_content"] = ""
                 elif isinstance(last_msg["original_content"], list):
                     last_msg["original_content"][1]["text"] = ""
         result = []
         for msg in processed:
             if msg["text_content"]:
+                result.append({"role": msg["role"], "content": msg["original_content"]})
         return result
     def embed_fn(self, text: str) -> list:
         Returns:
             list: A list of floats representing the embedding.
         """
+        client = OpenAI(
+            base_url=self.embedding_service_url, api_key=self.qianfan_api_key
+        )
         response = client.embeddings.create(input=[text], model=self.embedding_model)
         return response.data[0].embedding
         """
         headers = {
             "Authorization": "Bearer " + self.qianfan_api_key,
+            "Content-Type": "application/json",
         }
         results = []
         for query in query_list:
             payload = {
                 "messages": [{"role": "user", "content": query}],
+                "resource_type_filter": [{"type": "web", "top_k": top_k}],
             }
+            response = requests.post(
+                self.web_search_service_url, headers=headers, json=payload
+            )
             if response.status_code == 200:
                 response = response.json()
             else:
                 self.logger.info(f"请求失败，状态码: {response.status_code}")
                 self.logger.info(response.text)
+        return results