Spaces:

docpro
/

AWEsumCare-Demo

Runtime error

ray commited on Dec 19, 2023

Commit

7a9ec21

1 Parent(s): 9021b39

v2

Files changed (3) hide show

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ from llama_index.vector_stores.qdrant import QdrantVectorStore
 from llama_index.text_splitter import SentenceSplitter
 from llama_index.extractors import TitleExtractor
 from llama_index.ingestion import IngestionPipeline
-from chat_template import CHAT_TEXT_QA_PROMPT
 from schemas import ChatbotVersion, ServiceProvider
 from chatbot import Chatbot, IndexBuilder
 from custom_io import MarkdownReader, UnstructuredReader, default_file_metadata_func
@@ -29,7 +29,7 @@ llama_index.set_global_handler("arize_phoenix")
 openai.api_key = os.getenv("OPENAI_API_KEY")
 IS_LOAD_FROM_VECTOR_STORE = True
-VDB_COLLECTION_NAME = "demo-v1"
 MODEL_NAME = ChatbotVersion.CHATGPT_4.value
@@ -151,6 +151,7 @@ class AweSumCareContextChatbot(AwesumCareToolChatbot):
         self.chat_engine = self.index.as_chat_engine(
             chat_mode=ChatMode.CONTEXT,
             similarity_top_k=5,
             text_qa_template=CHAT_TEXT_QA_PROMPT)
 class AweSumCareSimpleChatbot(AwesumCareToolChatbot):

 from llama_index.text_splitter import SentenceSplitter
 from llama_index.extractors import TitleExtractor
 from llama_index.ingestion import IngestionPipeline
+from chat_template import CHAT_TEXT_QA_PROMPT, TEXT_QA_SYSTEM_PROMPT
 from schemas import ChatbotVersion, ServiceProvider
 from chatbot import Chatbot, IndexBuilder
 from custom_io import MarkdownReader, UnstructuredReader, default_file_metadata_func
 openai.api_key = os.getenv("OPENAI_API_KEY")
 IS_LOAD_FROM_VECTOR_STORE = True
+VDB_COLLECTION_NAME = "demo-v2"
 MODEL_NAME = ChatbotVersion.CHATGPT_4.value
         self.chat_engine = self.index.as_chat_engine(
             chat_mode=ChatMode.CONTEXT,
             similarity_top_k=5,
+            system_prompt=TEXT_QA_SYSTEM_PROMPT.content,
             text_qa_template=CHAT_TEXT_QA_PROMPT)
 class AweSumCareSimpleChatbot(AwesumCareToolChatbot):

chat_template.py CHANGED Viewed

@@ -8,7 +8,9 @@ TEXT_QA_SYSTEM_PROMPT = ChatMessage(
         "detailed information on legal and medical documents like '平安紙', '持久授權書', and '預設醫療指示'.\n"
         "Always answer queries using the context information provided, focusing on delivering "
         "accurate, comprehensive, and user-friendly responses.\n"
-        "當用戶用繁體中文時，請盡量使用繁體中文作答。"
     ),
     role=MessageRole.SYSTEM,
 )

         "detailed information on legal and medical documents like '平安紙', '持久授權書', and '預設醫療指示'.\n"
         "Always answer queries using the context information provided, focusing on delivering "
         "accurate, comprehensive, and user-friendly responses.\n"
+        "任何與安心三寶無關的問題, "
+        "please simply say: 很抱歉，身為安心三寶人工智能，我無法回答與安心三寶無關的內容。\n"
+        "當用戶用繁體中文時，使用繁體中文作答。"
     ),
     role=MessageRole.SYSTEM,
 )

custom_io.py CHANGED Viewed

@@ -8,6 +8,7 @@ from datetime import datetime
 import mimetypes
 import os
 from pathlib import Path
 from typing import Any, Dict, List, Optional
 from llama_index.readers.base import BaseReader
@@ -80,9 +81,10 @@ def parse_knowledge_units(file_path):
     knowledge_units = []
     current_unit = ""
     for line in lines:
-        if line.strip() and line[0].isdigit() and '.' in line:
             if current_unit:
                 knowledge_units.append(current_unit.strip())
                 current_unit = ""
@@ -92,6 +94,17 @@ def parse_knowledge_units(file_path):
     if current_unit:
         knowledge_units.append(current_unit.strip())
     return knowledge_units

 import mimetypes
 import os
 from pathlib import Path
+import re
 from typing import Any, Dict, List, Optional
 from llama_index.readers.base import BaseReader
     knowledge_units = []
     current_unit = ""
+    unit_start_pattern = re.compile(r'^\d+\.\s')
     for line in lines:
+        stripped_line = line.strip()
+        if unit_start_pattern.match(stripped_line):
             if current_unit:
                 knowledge_units.append(current_unit.strip())
                 current_unit = ""
     if current_unit:
         knowledge_units.append(current_unit.strip())
+    # for line in lines:
+    #     if line.strip() and line[0].isdigit() and '.' in line:
+    #         if current_unit:
+    #             knowledge_units.append(current_unit.strip())
+    #             current_unit = ""
+    #         current_unit += line
+    #     else:
+    #         current_unit += line
+    # if current_unit:
+    #     knowledge_units.append(current_unit.strip())
     return knowledge_units