Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,134 +1,170 @@
|
|
1 |
-
__import__('pysqlite3')
|
2 |
-
import sys
|
3 |
-
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
|
4 |
-
|
5 |
-
# DATABASES = {
|
6 |
-
# 'default': {
|
7 |
-
# 'ENGINE': 'django.db.backends.sqlite3',
|
8 |
-
# 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'),
|
9 |
-
# }
|
10 |
-
# }
|
11 |
-
import streamlit as st
|
12 |
-
from huggingface_hub import InferenceClient
|
13 |
-
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext, PromptTemplate
|
14 |
-
from llama_index.vector_stores.chroma import ChromaVectorStore
|
15 |
-
from llama_index.core import StorageContext
|
16 |
-
from langchain.embeddings import HuggingFaceEmbeddings
|
17 |
-
from langchain.text_splitter import CharacterTextSplitter
|
18 |
-
from langchain.vectorstores import Chroma
|
19 |
-
import chromadb
|
20 |
-
from langchain.memory import ConversationBufferMemory
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
# Set page config
|
25 |
-
st.set_page_config(page_title="
|
26 |
-
|
27 |
-
# Set your Hugging Face token here
|
28 |
-
|
29 |
-
HF_TOKEN = st.secrets["HF_TOKEN"]
|
30 |
-
|
31 |
-
# Initialize your models, databases, and other components here
|
32 |
-
@st.cache_resource
|
33 |
-
def init_chroma():
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
@st.cache_resource
|
40 |
-
def init_vectorstore():
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
#
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
#
|
65 |
-
|
66 |
-
|
67 |
-
#
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
#
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
#
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
st.markdown("<div style='text-align:center;'></div>", unsafe_allow_html=True)
|
|
|
1 |
+
__import__('pysqlite3')
|
2 |
+
import sys
|
3 |
+
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
|
4 |
+
|
5 |
+
# DATABASES = {
|
6 |
+
# 'default': {
|
7 |
+
# 'ENGINE': 'django.db.backends.sqlite3',
|
8 |
+
# 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'),
|
9 |
+
# }
|
10 |
+
# }
|
11 |
+
import streamlit as st
|
12 |
+
from huggingface_hub import InferenceClient
|
13 |
+
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext, PromptTemplate
|
14 |
+
from llama_index.vector_stores.chroma import ChromaVectorStore
|
15 |
+
from llama_index.core import StorageContext
|
16 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
17 |
+
from langchain.text_splitter import CharacterTextSplitter
|
18 |
+
from langchain.vectorstores import Chroma
|
19 |
+
import chromadb
|
20 |
+
from langchain.memory import ConversationBufferMemory
|
21 |
+
|
22 |
+
|
23 |
+
|
24 |
+
# Set page config
|
25 |
+
st.set_page_config(page_title="MBAL Chatbot", page_icon="🛡️", layout="wide")
|
26 |
+
|
27 |
+
# Set your Hugging Face token here
|
28 |
+
|
29 |
+
HF_TOKEN = st.secrets["HF_TOKEN"]
|
30 |
+
|
31 |
+
# Initialize your models, databases, and other components here
|
32 |
+
# @st.cache_resource
|
33 |
+
# def init_chroma():
|
34 |
+
# persist_directory = "chroma_db"
|
35 |
+
# chroma_client = chromadb.PersistentClient(path=persist_directory)
|
36 |
+
# chroma_collection = chroma_client.get_or_create_collection("my_collection")
|
37 |
+
# return chroma_client, chroma_collection
|
38 |
+
|
39 |
+
# @st.cache_resource
|
40 |
+
# def init_vectorstore():
|
41 |
+
# persist_directory = "chroma_db"
|
42 |
+
# embeddings = HuggingFaceEmbeddings()
|
43 |
+
# vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings, collection_name="my_collection")
|
44 |
+
# return vectorstore
|
45 |
+
@st.cache_resource
|
46 |
+
def setup_vector():
|
47 |
+
# Đọc dữ liệu từ file Excel
|
48 |
+
df = pd.read_excel("chunk_metadata_template.xlsx")
|
49 |
+
chunks = []
|
50 |
+
|
51 |
+
# Tạo danh sách các Document có metadata
|
52 |
+
for _, row in df.iterrows():
|
53 |
+
chunk_with_metadata = Document(
|
54 |
+
page_content=row['page_content'],
|
55 |
+
metadata={
|
56 |
+
'chunk_id': row['chunk_id'],
|
57 |
+
'document_title': row['document_title'],
|
58 |
+
'topic': row['topic'],
|
59 |
+
'access': row['access']
|
60 |
+
}
|
61 |
+
)
|
62 |
+
chunks.append(chunk_with_metadata)
|
63 |
+
|
64 |
+
# Khởi tạo embedding
|
65 |
+
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
|
66 |
+
|
67 |
+
# Khởi tạo hoặc ghi vào vectorstore đã tồn tại
|
68 |
+
persist_directory = "chroma_db"
|
69 |
+
collection_name = "my_collection"
|
70 |
+
|
71 |
+
# Tạo vectorstore từ dữ liệu và ghi vào Chroma
|
72 |
+
vectorstore = Chroma.from_documents(
|
73 |
+
documents=chunks,
|
74 |
+
embedding=embeddings,
|
75 |
+
persist_directory=persist_directory,
|
76 |
+
collection_name=collection_name
|
77 |
+
)
|
78 |
+
|
79 |
+
# Ghi xuống đĩa để đảm bảo dữ liệu được lưu
|
80 |
+
vectorstore.persist()
|
81 |
+
|
82 |
+
return vectorstore
|
83 |
+
|
84 |
+
# Initialize components
|
85 |
+
client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3", token=HF_TOKEN)
|
86 |
+
chroma_client, chroma_collection = init_chroma()
|
87 |
+
vectorstore = init_vectorstore()
|
88 |
+
|
89 |
+
# Initialize memory buffer
|
90 |
+
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
91 |
+
|
92 |
+
def rag_query(query):
|
93 |
+
# Retrieve relevant documents using similarity search
|
94 |
+
retrieved_docs = vectorstore.similarity_search(query, k=3)
|
95 |
+
|
96 |
+
# Prepare context for LLaMA
|
97 |
+
if retrieved_docs:
|
98 |
+
context = "\n".join([doc.page_content for doc in retrieved_docs])
|
99 |
+
else:
|
100 |
+
context = ""
|
101 |
+
|
102 |
+
# Append new interaction to memory
|
103 |
+
memory.chat_memory.add_user_message(query)
|
104 |
+
|
105 |
+
# Retrieve past interactions for context
|
106 |
+
past_interactions = memory.load_memory_variables({})[memory.memory_key]
|
107 |
+
context_with_memory = f"{context}\n\nConversation History:\n{past_interactions}"
|
108 |
+
|
109 |
+
# Debugging: Display context and past interactions
|
110 |
+
# st.write("Debugging Info:")
|
111 |
+
# st.write("Context Sent to Model:", context_with_memory)
|
112 |
+
# st.write("Retrieved Documents:", [doc.page_content for doc in retrieved_docs])
|
113 |
+
# st.write("Past Interactions:", past_interactions)
|
114 |
+
|
115 |
+
# Generate response using LLaMA
|
116 |
+
messages = [
|
117 |
+
{"role": "user", "content": f"Bạn là một chuyên viên tư vấn cho khách hàng về sản phẩm bảo hiểm của công ty MB Ageas Life tại Việt Nam.
|
118 |
+
Hãy trả lời chuyên nghiệp, chính xác, cung cấp thông tin trước rồi hỏi câu tiếp theo. Tất cả các thông tin cung cấp đều trong phạm vi MBAL. Khi có đủ thông tin khách hàng thì mới mời khách hàng đăng ký để nhận tư vấn trên https://www.mbageas.life/
|
119 |
+
{context_with_memory} \nCâu hỏi: {query} \nTrả lời:"}
|
120 |
+
]
|
121 |
+
|
122 |
+
# Get the response from the client
|
123 |
+
response_content = client.chat_completion(messages=messages, max_tokens=500, stream=False)
|
124 |
+
|
125 |
+
# Process the response content
|
126 |
+
response = response_content.choices[0].message.content.split("Answer:")[-1].strip()
|
127 |
+
|
128 |
+
# If the response is empty or very short, or if no relevant documents were found, use the LLM's default knowledge
|
129 |
+
if not context or len(response.split()) < 35 or not retrieved_docs:
|
130 |
+
messages = [{"role": "user", "content": query}]
|
131 |
+
response_content = client.chat_completion(messages=messages, max_tokens=500, stream=False)
|
132 |
+
response = response_content.choices[0].message.content
|
133 |
+
|
134 |
+
# Append the response to memory
|
135 |
+
memory.chat_memory.add_ai_message(response)
|
136 |
+
|
137 |
+
return response
|
138 |
+
|
139 |
+
def process_feedback(query, response, feedback):
|
140 |
+
# st.write(f"Feedback received: {'👍' if feedback else '👎'} for query: {query}")
|
141 |
+
if feedback:
|
142 |
+
# If thumbs up, store the response in memory buffer
|
143 |
+
memory.chat_memory.add_ai_message(response)
|
144 |
+
else:
|
145 |
+
# If thumbs down, remove the response from memory buffer and regenerate the response
|
146 |
+
# memory.chat_memory.messages = [msg for msg in memory.chat_memory.messages if msg.get("content") != response]
|
147 |
+
new_query=f"{query}. Give better response"
|
148 |
+
new_response = rag_query(new_query)
|
149 |
+
st.markdown(new_response)
|
150 |
+
memory.chat_memory.add_ai_message(new_response)
|
151 |
+
|
152 |
+
# Streamlit interface
|
153 |
+
|
154 |
+
st.title("Welcome to our RAG-Based Chatbot")
|
155 |
+
st.markdown("***")
|
156 |
+
st.info('''
|
157 |
+
To use Our Mistral supported Chatbot, click Chat.
|
158 |
+
|
159 |
+
To push data, click on Store Document.
|
160 |
+
''')
|
161 |
+
|
162 |
+
col1, col2 = st.columns(2)
|
163 |
+
|
164 |
+
with col1:
|
165 |
+
chat = st.button("Chat")
|
166 |
+
if chat:
|
167 |
+
st.switch_page("pages/chatbot.py")
|
168 |
+
|
169 |
+
|
170 |
st.markdown("<div style='text-align:center;'></div>", unsafe_allow_html=True)
|