Spaces:
Runtime error
Runtime error
ray
commited on
Commit
·
7a9ec21
1
Parent(s):
9021b39
v2
Browse files- app.py +3 -2
- chat_template.py +3 -1
- custom_io.py +15 -2
app.py
CHANGED
|
@@ -12,7 +12,7 @@ from llama_index.vector_stores.qdrant import QdrantVectorStore
|
|
| 12 |
from llama_index.text_splitter import SentenceSplitter
|
| 13 |
from llama_index.extractors import TitleExtractor
|
| 14 |
from llama_index.ingestion import IngestionPipeline
|
| 15 |
-
from chat_template import CHAT_TEXT_QA_PROMPT
|
| 16 |
from schemas import ChatbotVersion, ServiceProvider
|
| 17 |
from chatbot import Chatbot, IndexBuilder
|
| 18 |
from custom_io import MarkdownReader, UnstructuredReader, default_file_metadata_func
|
|
@@ -29,7 +29,7 @@ llama_index.set_global_handler("arize_phoenix")
|
|
| 29 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
| 30 |
|
| 31 |
IS_LOAD_FROM_VECTOR_STORE = True
|
| 32 |
-
VDB_COLLECTION_NAME = "demo-
|
| 33 |
MODEL_NAME = ChatbotVersion.CHATGPT_4.value
|
| 34 |
|
| 35 |
|
|
@@ -151,6 +151,7 @@ class AweSumCareContextChatbot(AwesumCareToolChatbot):
|
|
| 151 |
self.chat_engine = self.index.as_chat_engine(
|
| 152 |
chat_mode=ChatMode.CONTEXT,
|
| 153 |
similarity_top_k=5,
|
|
|
|
| 154 |
text_qa_template=CHAT_TEXT_QA_PROMPT)
|
| 155 |
|
| 156 |
class AweSumCareSimpleChatbot(AwesumCareToolChatbot):
|
|
|
|
| 12 |
from llama_index.text_splitter import SentenceSplitter
|
| 13 |
from llama_index.extractors import TitleExtractor
|
| 14 |
from llama_index.ingestion import IngestionPipeline
|
| 15 |
+
from chat_template import CHAT_TEXT_QA_PROMPT, TEXT_QA_SYSTEM_PROMPT
|
| 16 |
from schemas import ChatbotVersion, ServiceProvider
|
| 17 |
from chatbot import Chatbot, IndexBuilder
|
| 18 |
from custom_io import MarkdownReader, UnstructuredReader, default_file_metadata_func
|
|
|
|
| 29 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
| 30 |
|
| 31 |
IS_LOAD_FROM_VECTOR_STORE = True
|
| 32 |
+
VDB_COLLECTION_NAME = "demo-v2"
|
| 33 |
MODEL_NAME = ChatbotVersion.CHATGPT_4.value
|
| 34 |
|
| 35 |
|
|
|
|
| 151 |
self.chat_engine = self.index.as_chat_engine(
|
| 152 |
chat_mode=ChatMode.CONTEXT,
|
| 153 |
similarity_top_k=5,
|
| 154 |
+
system_prompt=TEXT_QA_SYSTEM_PROMPT.content,
|
| 155 |
text_qa_template=CHAT_TEXT_QA_PROMPT)
|
| 156 |
|
| 157 |
class AweSumCareSimpleChatbot(AwesumCareToolChatbot):
|
chat_template.py
CHANGED
|
@@ -8,7 +8,9 @@ TEXT_QA_SYSTEM_PROMPT = ChatMessage(
|
|
| 8 |
"detailed information on legal and medical documents like '平安紙', '持久授權書', and '預設醫療指示'.\n"
|
| 9 |
"Always answer queries using the context information provided, focusing on delivering "
|
| 10 |
"accurate, comprehensive, and user-friendly responses.\n"
|
| 11 |
-
"
|
|
|
|
|
|
|
| 12 |
),
|
| 13 |
role=MessageRole.SYSTEM,
|
| 14 |
)
|
|
|
|
| 8 |
"detailed information on legal and medical documents like '平安紙', '持久授權書', and '預設醫療指示'.\n"
|
| 9 |
"Always answer queries using the context information provided, focusing on delivering "
|
| 10 |
"accurate, comprehensive, and user-friendly responses.\n"
|
| 11 |
+
"任何與安心三寶無關的問題, "
|
| 12 |
+
"please simply say: 很抱歉,身為安心三寶人工智能,我無法回答與安心三寶無關的內容。\n"
|
| 13 |
+
"當用戶用繁體中文時,使用繁體中文作答。"
|
| 14 |
),
|
| 15 |
role=MessageRole.SYSTEM,
|
| 16 |
)
|
custom_io.py
CHANGED
|
@@ -8,6 +8,7 @@ from datetime import datetime
|
|
| 8 |
import mimetypes
|
| 9 |
import os
|
| 10 |
from pathlib import Path
|
|
|
|
| 11 |
from typing import Any, Dict, List, Optional
|
| 12 |
|
| 13 |
from llama_index.readers.base import BaseReader
|
|
@@ -80,9 +81,10 @@ def parse_knowledge_units(file_path):
|
|
| 80 |
|
| 81 |
knowledge_units = []
|
| 82 |
current_unit = ""
|
| 83 |
-
|
| 84 |
for line in lines:
|
| 85 |
-
|
|
|
|
| 86 |
if current_unit:
|
| 87 |
knowledge_units.append(current_unit.strip())
|
| 88 |
current_unit = ""
|
|
@@ -92,6 +94,17 @@ def parse_knowledge_units(file_path):
|
|
| 92 |
|
| 93 |
if current_unit:
|
| 94 |
knowledge_units.append(current_unit.strip())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
return knowledge_units
|
| 97 |
|
|
|
|
| 8 |
import mimetypes
|
| 9 |
import os
|
| 10 |
from pathlib import Path
|
| 11 |
+
import re
|
| 12 |
from typing import Any, Dict, List, Optional
|
| 13 |
|
| 14 |
from llama_index.readers.base import BaseReader
|
|
|
|
| 81 |
|
| 82 |
knowledge_units = []
|
| 83 |
current_unit = ""
|
| 84 |
+
unit_start_pattern = re.compile(r'^\d+\.\s')
|
| 85 |
for line in lines:
|
| 86 |
+
stripped_line = line.strip()
|
| 87 |
+
if unit_start_pattern.match(stripped_line):
|
| 88 |
if current_unit:
|
| 89 |
knowledge_units.append(current_unit.strip())
|
| 90 |
current_unit = ""
|
|
|
|
| 94 |
|
| 95 |
if current_unit:
|
| 96 |
knowledge_units.append(current_unit.strip())
|
| 97 |
+
# for line in lines:
|
| 98 |
+
# if line.strip() and line[0].isdigit() and '.' in line:
|
| 99 |
+
# if current_unit:
|
| 100 |
+
# knowledge_units.append(current_unit.strip())
|
| 101 |
+
# current_unit = ""
|
| 102 |
+
# current_unit += line
|
| 103 |
+
# else:
|
| 104 |
+
# current_unit += line
|
| 105 |
+
|
| 106 |
+
# if current_unit:
|
| 107 |
+
# knowledge_units.append(current_unit.strip())
|
| 108 |
|
| 109 |
return knowledge_units
|
| 110 |
|