ngcanh commited on
Commit
64806bd
·
verified ·
1 Parent(s): 9e7d644

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +169 -133
app.py CHANGED
@@ -1,134 +1,170 @@
1
- __import__('pysqlite3')
2
- import sys
3
- sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
4
-
5
- # DATABASES = {
6
- # 'default': {
7
- # 'ENGINE': 'django.db.backends.sqlite3',
8
- # 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'),
9
- # }
10
- # }
11
- import streamlit as st
12
- from huggingface_hub import InferenceClient
13
- from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext, PromptTemplate
14
- from llama_index.vector_stores.chroma import ChromaVectorStore
15
- from llama_index.core import StorageContext
16
- from langchain.embeddings import HuggingFaceEmbeddings
17
- from langchain.text_splitter import CharacterTextSplitter
18
- from langchain.vectorstores import Chroma
19
- import chromadb
20
- from langchain.memory import ConversationBufferMemory
21
-
22
-
23
-
24
- # Set page config
25
- st.set_page_config(page_title="RAG Chatbot", page_icon="🤖", layout="wide")
26
-
27
- # Set your Hugging Face token here
28
-
29
- HF_TOKEN = st.secrets["HF_TOKEN"]
30
-
31
- # Initialize your models, databases, and other components here
32
- @st.cache_resource
33
- def init_chroma():
34
- persist_directory = "chroma_db"
35
- chroma_client = chromadb.PersistentClient(path=persist_directory)
36
- chroma_collection = chroma_client.get_or_create_collection("my_collection")
37
- return chroma_client, chroma_collection
38
-
39
- @st.cache_resource
40
- def init_vectorstore():
41
- persist_directory = "chroma_db"
42
- embeddings = HuggingFaceEmbeddings()
43
- vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings, collection_name="my_collection")
44
- return vectorstore
45
-
46
- # Initialize components
47
- client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3", token=HF_TOKEN)
48
- chroma_client, chroma_collection = init_chroma()
49
- vectorstore = init_vectorstore()
50
-
51
- # Initialize memory buffer
52
- memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
53
-
54
- def rag_query(query):
55
- # Retrieve relevant documents using similarity search
56
- retrieved_docs = vectorstore.similarity_search(query, k=3)
57
-
58
- # Prepare context for LLaMA
59
- if retrieved_docs:
60
- context = "\n".join([doc.page_content for doc in retrieved_docs])
61
- else:
62
- context = ""
63
-
64
- # Append new interaction to memory
65
- memory.chat_memory.add_user_message(query)
66
-
67
- # Retrieve past interactions for context
68
- past_interactions = memory.load_memory_variables({})[memory.memory_key]
69
- context_with_memory = f"{context}\n\nConversation History:\n{past_interactions}"
70
-
71
- # Debugging: Display context and past interactions
72
- # st.write("Debugging Info:")
73
- # st.write("Context Sent to Model:", context_with_memory)
74
- # st.write("Retrieved Documents:", [doc.page_content for doc in retrieved_docs])
75
- # st.write("Past Interactions:", past_interactions)
76
-
77
- # Generate response using LLaMA
78
- messages = [
79
- {"role": "user", "content": f"Context: {context_with_memory}\n\nQuestion: {query},it is not mandatory to use the context\n\nAnswer:"}
80
- ]
81
-
82
- # Get the response from the client
83
- response_content = client.chat_completion(messages=messages, max_tokens=500, stream=False)
84
-
85
- # Process the response content
86
- response = response_content.choices[0].message.content.split("Answer:")[-1].strip()
87
-
88
- # If the response is empty or very short, or if no relevant documents were found, use the LLM's default knowledge
89
- if not context or len(response.split()) < 35 or not retrieved_docs:
90
- messages = [{"role": "user", "content": query}]
91
- response_content = client.chat_completion(messages=messages, max_tokens=500, stream=False)
92
- response = response_content.choices[0].message.content
93
-
94
- # Append the response to memory
95
- memory.chat_memory.add_ai_message(response)
96
-
97
- return response
98
-
99
- def process_feedback(query, response, feedback):
100
- # st.write(f"Feedback received: {'👍' if feedback else '👎'} for query: {query}")
101
- if feedback:
102
- # If thumbs up, store the response in memory buffer
103
- memory.chat_memory.add_ai_message(response)
104
- else:
105
- # If thumbs down, remove the response from memory buffer and regenerate the response
106
- # memory.chat_memory.messages = [msg for msg in memory.chat_memory.messages if msg.get("content") != response]
107
- new_query=f"{query}. Give better response"
108
- new_response = rag_query(new_query)
109
- st.markdown(new_response)
110
- memory.chat_memory.add_ai_message(new_response)
111
-
112
- # Streamlit interface
113
-
114
- st.title("Welcome to our RAG-Based Chatbot")
115
- st.markdown("***")
116
- st.info('''
117
- To use Our Mistral supported Chatbot, click Chat.
118
-
119
- To push data, click on Store Document.
120
- ''')
121
-
122
- col1, col2 = st.columns(2)
123
-
124
- with col1:
125
- chat = st.button("Chat")
126
- if chat:
127
- st.switch_page("pages/chatbot.py")
128
-
129
- with col2:
130
- rag = st.button("Store Document")
131
- if rag:
132
- st.switch_page("pages/management.py")
133
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  st.markdown("<div style='text-align:center;'></div>", unsafe_allow_html=True)
 
1
+ __import__('pysqlite3')
2
+ import sys
3
+ sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
4
+
5
+ # DATABASES = {
6
+ # 'default': {
7
+ # 'ENGINE': 'django.db.backends.sqlite3',
8
+ # 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'),
9
+ # }
10
+ # }
11
+ import streamlit as st
12
+ from huggingface_hub import InferenceClient
13
+ from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext, PromptTemplate
14
+ from llama_index.vector_stores.chroma import ChromaVectorStore
15
+ from llama_index.core import StorageContext
16
+ from langchain.embeddings import HuggingFaceEmbeddings
17
+ from langchain.text_splitter import CharacterTextSplitter
18
+ from langchain.vectorstores import Chroma
19
+ import chromadb
20
+ from langchain.memory import ConversationBufferMemory
21
+
22
+
23
+
24
+ # Set page config
25
+ st.set_page_config(page_title="MBAL Chatbot", page_icon="🛡️", layout="wide")
26
+
27
+ # Set your Hugging Face token here
28
+
29
+ HF_TOKEN = st.secrets["HF_TOKEN"]
30
+
31
+ # Initialize your models, databases, and other components here
32
+ # @st.cache_resource
33
+ # def init_chroma():
34
+ # persist_directory = "chroma_db"
35
+ # chroma_client = chromadb.PersistentClient(path=persist_directory)
36
+ # chroma_collection = chroma_client.get_or_create_collection("my_collection")
37
+ # return chroma_client, chroma_collection
38
+
39
+ # @st.cache_resource
40
+ # def init_vectorstore():
41
+ # persist_directory = "chroma_db"
42
+ # embeddings = HuggingFaceEmbeddings()
43
+ # vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings, collection_name="my_collection")
44
+ # return vectorstore
45
+ @st.cache_resource
46
+ def setup_vector():
47
+ # Đọc dữ liệu từ file Excel
48
+ df = pd.read_excel("chunk_metadata_template.xlsx")
49
+ chunks = []
50
+
51
+ # Tạo danh sách các Document có metadata
52
+ for _, row in df.iterrows():
53
+ chunk_with_metadata = Document(
54
+ page_content=row['page_content'],
55
+ metadata={
56
+ 'chunk_id': row['chunk_id'],
57
+ 'document_title': row['document_title'],
58
+ 'topic': row['topic'],
59
+ 'access': row['access']
60
+ }
61
+ )
62
+ chunks.append(chunk_with_metadata)
63
+
64
+ # Khởi tạo embedding
65
+ embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
66
+
67
+ # Khởi tạo hoặc ghi vào vectorstore đã tồn tại
68
+ persist_directory = "chroma_db"
69
+ collection_name = "my_collection"
70
+
71
+ # Tạo vectorstore từ dữ liệu và ghi vào Chroma
72
+ vectorstore = Chroma.from_documents(
73
+ documents=chunks,
74
+ embedding=embeddings,
75
+ persist_directory=persist_directory,
76
+ collection_name=collection_name
77
+ )
78
+
79
+ # Ghi xuống đĩa để đảm bảo dữ liệu được lưu
80
+ vectorstore.persist()
81
+
82
+ return vectorstore
83
+
84
+ # Initialize components
85
+ client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3", token=HF_TOKEN)
86
+ chroma_client, chroma_collection = init_chroma()
87
+ vectorstore = init_vectorstore()
88
+
89
+ # Initialize memory buffer
90
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
91
+
92
+ def rag_query(query):
93
+ # Retrieve relevant documents using similarity search
94
+ retrieved_docs = vectorstore.similarity_search(query, k=3)
95
+
96
+ # Prepare context for LLaMA
97
+ if retrieved_docs:
98
+ context = "\n".join([doc.page_content for doc in retrieved_docs])
99
+ else:
100
+ context = ""
101
+
102
+ # Append new interaction to memory
103
+ memory.chat_memory.add_user_message(query)
104
+
105
+ # Retrieve past interactions for context
106
+ past_interactions = memory.load_memory_variables({})[memory.memory_key]
107
+ context_with_memory = f"{context}\n\nConversation History:\n{past_interactions}"
108
+
109
+ # Debugging: Display context and past interactions
110
+ # st.write("Debugging Info:")
111
+ # st.write("Context Sent to Model:", context_with_memory)
112
+ # st.write("Retrieved Documents:", [doc.page_content for doc in retrieved_docs])
113
+ # st.write("Past Interactions:", past_interactions)
114
+
115
+ # Generate response using LLaMA
116
+ messages = [
117
+ {"role": "user", "content": f"Bạn một chuyên viên tư vấn cho khách hàng về sản phẩm bảo hiểm của công ty MB Ageas Life tại Việt Nam.
118
+ Hãy trả lời chuyên nghiệp, chính xác, cung cấp thông tin trước rồi hỏi câu tiếp theo. Tất cả các thông tin cung cấp đều trong phạm vi MBAL. Khi có đủ thông tin khách hàng thì mới mời khách hàng đăng ký để nhận tư vấn trên https://www.mbageas.life/
119
+ {context_with_memory} \nCâu hỏi: {query} \nTrả lời:"}
120
+ ]
121
+
122
+ # Get the response from the client
123
+ response_content = client.chat_completion(messages=messages, max_tokens=500, stream=False)
124
+
125
+ # Process the response content
126
+ response = response_content.choices[0].message.content.split("Answer:")[-1].strip()
127
+
128
+ # If the response is empty or very short, or if no relevant documents were found, use the LLM's default knowledge
129
+ if not context or len(response.split()) < 35 or not retrieved_docs:
130
+ messages = [{"role": "user", "content": query}]
131
+ response_content = client.chat_completion(messages=messages, max_tokens=500, stream=False)
132
+ response = response_content.choices[0].message.content
133
+
134
+ # Append the response to memory
135
+ memory.chat_memory.add_ai_message(response)
136
+
137
+ return response
138
+
139
+ def process_feedback(query, response, feedback):
140
+ # st.write(f"Feedback received: {'👍' if feedback else '👎'} for query: {query}")
141
+ if feedback:
142
+ # If thumbs up, store the response in memory buffer
143
+ memory.chat_memory.add_ai_message(response)
144
+ else:
145
+ # If thumbs down, remove the response from memory buffer and regenerate the response
146
+ # memory.chat_memory.messages = [msg for msg in memory.chat_memory.messages if msg.get("content") != response]
147
+ new_query=f"{query}. Give better response"
148
+ new_response = rag_query(new_query)
149
+ st.markdown(new_response)
150
+ memory.chat_memory.add_ai_message(new_response)
151
+
152
+ # Streamlit interface
153
+
154
+ st.title("Welcome to our RAG-Based Chatbot")
155
+ st.markdown("***")
156
+ st.info('''
157
+ To use Our Mistral supported Chatbot, click Chat.
158
+
159
+ To push data, click on Store Document.
160
+ ''')
161
+
162
+ col1, col2 = st.columns(2)
163
+
164
+ with col1:
165
+ chat = st.button("Chat")
166
+ if chat:
167
+ st.switch_page("pages/chatbot.py")
168
+
169
+
170
  st.markdown("<div style='text-align:center;'></div>", unsafe_allow_html=True)