ngcanh commited on
Commit
3c7d3dd
·
verified ·
1 Parent(s): ada8cbb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +149 -133
app.py CHANGED
@@ -1,134 +1,150 @@
1
- __import__('pysqlite3')
2
- import sys
3
- sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
4
-
5
- # DATABASES = {
6
- # 'default': {
7
- # 'ENGINE': 'django.db.backends.sqlite3',
8
- # 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'),
9
- # }
10
- # }
11
- import streamlit as st
12
- from huggingface_hub import InferenceClient
13
- from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext, PromptTemplate
14
- from llama_index.vector_stores.chroma import ChromaVectorStore
15
- from llama_index.core import StorageContext
16
- from langchain.embeddings import HuggingFaceEmbeddings
17
- from langchain.text_splitter import CharacterTextSplitter
18
- from langchain.vectorstores import Chroma
19
- import chromadb
20
- from langchain.memory import ConversationBufferMemory
21
-
22
-
23
-
24
- # Set page config
25
- st.set_page_config(page_title="RAG Chatbot", page_icon="🤖", layout="wide")
26
-
27
- # Set your Hugging Face token here
28
-
29
- HF_TOKEN = st.secrets["HF_TOKEN"]
30
-
31
- # Initialize your models, databases, and other components here
32
- @st.cache_resource
33
- def init_chroma():
34
- persist_directory = "chroma_db"
35
- chroma_client = chromadb.PersistentClient(path=persist_directory)
36
- chroma_collection = chroma_client.get_or_create_collection("my_collection")
37
- return chroma_client, chroma_collection
38
-
39
- @st.cache_resource
40
- def init_vectorstore():
41
- persist_directory = "chroma_db"
42
- embeddings = HuggingFaceEmbeddings()
43
- vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings, collection_name="my_collection")
44
- return vectorstore
45
-
46
- # Initialize components
47
- client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3", token=HF_TOKEN)
48
- chroma_client, chroma_collection = init_chroma()
49
- vectorstore = init_vectorstore()
50
-
51
- # Initialize memory buffer
52
- memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
53
-
54
- def rag_query(query):
55
- # Retrieve relevant documents using similarity search
56
- retrieved_docs = vectorstore.similarity_search(query, k=3)
57
-
58
- # Prepare context for LLaMA
59
- if retrieved_docs:
60
- context = "\n".join([doc.page_content for doc in retrieved_docs])
61
- else:
62
- context = ""
63
-
64
- # Append new interaction to memory
65
- memory.chat_memory.add_user_message(query)
66
-
67
- # Retrieve past interactions for context
68
- past_interactions = memory.load_memory_variables({})[memory.memory_key]
69
- context_with_memory = f"{context}\n\nConversation History:\n{past_interactions}"
70
-
71
- # Debugging: Display context and past interactions
72
- # st.write("Debugging Info:")
73
- # st.write("Context Sent to Model:", context_with_memory)
74
- # st.write("Retrieved Documents:", [doc.page_content for doc in retrieved_docs])
75
- # st.write("Past Interactions:", past_interactions)
76
-
77
- # Generate response using LLaMA
78
- messages = [
79
- {"role": "user", "content": f"Context: {context_with_memory}\n\nQuestion: {query},it is not mandatory to use the context\n\nAnswer:"}
80
- ]
81
-
82
- # Get the response from the client
83
- response_content = client.chat_completion(messages=messages, max_tokens=500, stream=False)
84
-
85
- # Process the response content
86
- response = response_content.choices[0].message.content.split("Answer:")[-1].strip()
87
-
88
- # If the response is empty or very short, or if no relevant documents were found, use the LLM's default knowledge
89
- if not context or len(response.split()) < 35 or not retrieved_docs:
90
- messages = [{"role": "user", "content": query}]
91
- response_content = client.chat_completion(messages=messages, max_tokens=500, stream=False)
92
- response = response_content.choices[0].message.content
93
-
94
- # Append the response to memory
95
- memory.chat_memory.add_ai_message(response)
96
-
97
- return response
98
-
99
- def process_feedback(query, response, feedback):
100
- # st.write(f"Feedback received: {'👍' if feedback else '👎'} for query: {query}")
101
- if feedback:
102
- # If thumbs up, store the response in memory buffer
103
- memory.chat_memory.add_ai_message(response)
104
- else:
105
- # If thumbs down, remove the response from memory buffer and regenerate the response
106
- # memory.chat_memory.messages = [msg for msg in memory.chat_memory.messages if msg.get("content") != response]
107
- new_query=f"{query}. Give better response"
108
- new_response = rag_query(new_query)
109
- st.markdown(new_response)
110
- memory.chat_memory.add_ai_message(new_response)
111
-
112
- # Streamlit interface
113
-
114
- st.title("Welcome to our RAG-Based Chatbot")
115
- st.markdown("***")
116
- st.info('''
117
- To use Our Mistral supported Chatbot, click Chat.
118
-
119
- To push data, click on Store Document.
120
- ''')
121
-
122
- col1, col2 = st.columns(2)
123
-
124
- with col1:
125
- chat = st.button("Chat")
126
- if chat:
127
- st.switch_page("pages/chatbot.py")
128
-
129
- with col2:
130
- rag = st.button("Store Document")
131
- if rag:
132
- st.switch_page("pages/management.py")
133
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  st.markdown("<div style='text-align:center;'></div>", unsafe_allow_html=True)
 
1
+ __import__('pysqlite3')
2
+ import sys
3
+ sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
4
+
5
+ # DATABASES = {
6
+ # 'default': {
7
+ # 'ENGINE': 'django.db.backends.sqlite3',
8
+ # 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'),
9
+ # }
10
+ # }
11
+ import streamlit as st
12
+ from huggingface_hub import InferenceClient
13
+ from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext, PromptTemplate
14
+ from llama_index.vector_stores.chroma import ChromaVectorStore
15
+ from llama_index.core import StorageContext
16
+ from langchain.embeddings import HuggingFaceEmbeddings
17
+ from langchain.text_splitter import CharacterTextSplitter
18
+ from langchain.vectorstores import Chroma
19
+ import chromadb
20
+ from langchain.memory import ConversationBufferMemory
21
+ import pandas as pd
22
+ from langchain.schema import Document
23
+
24
+
25
+ # Set page config
26
+ st.set_page_config(page_title="MBAL Chatbot", page_icon="🛡️", layout="wide")
27
+
28
+ # Set your Hugging Face token here
29
+
30
+ HF_TOKEN = st.secrets["HF_TOKEN"]
31
+
32
+ @st.cache_resource
33
+ def init_chroma():
34
+ persist_directory = "chroma_db"
35
+ chroma_client = chromadb.PersistentClient(path=persist_directory)
36
+ chroma_collection = chroma_client.get_or_create_collection("my_collection")
37
+ return chroma_client, chroma_collection
38
+
39
+ @st.cache_resource
40
+ def init_vectorstore():
41
+ persist_directory = "chroma_db"
42
+ embeddings = HuggingFaceEmbeddings()
43
+ vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings, collection_name="my_collection")
44
+ return vectorstore
45
+ @st.cache_resource
46
+ def setup_vector():
47
+ # Đọc dữ liệu từ file Excel
48
+ df = pd.read_excel("chunk_metadata_template.xlsx")
49
+ chunks = []
50
+
51
+ # Tạo danh sách các Document có metadata
52
+ for _, row in df.iterrows():
53
+ chunk_with_metadata = Document(
54
+ page_content=row['page_content'],
55
+ metadata={
56
+ 'chunk_id': row['chunk_id'],
57
+ 'document_title': row['document_title'],
58
+ 'topic': row['topic'],
59
+ 'access': row['access']
60
+ }
61
+ )
62
+ chunks.append(chunk_with_metadata)
63
+
64
+ # Khởi tạo embedding
65
+ embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
66
+
67
+ # Khởi tạo hoặc ghi vào vectorstore đã tồn tại
68
+ persist_directory = "chroma_db"
69
+ collection_name = "my_collection"
70
+
71
+ # Tạo vectorstore từ dữ liệu và ghi vào Chroma
72
+ vectorstore = Chroma.from_documents(
73
+ documents=chunks,
74
+ embedding=embeddings,
75
+ persist_directory=persist_directory,
76
+ collection_name=collection_name
77
+ )
78
+
79
+ # Ghi xuống đĩa để đảm bảo dữ liệu được lưu
80
+ vectorstore.persist()
81
+
82
+ return vectorstore
83
+
84
+ # Initialize components
85
+ client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3", token=HF_TOKEN)
86
+ chroma_client, chroma_collection = init_chroma()
87
+ init_vectorstore()
88
+ vectorstore = setup_vector()
89
+
90
+ # Initialize memory buffer
91
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
92
+
93
+ def rag_query(query):
94
+ # Lấy tài liệu liên quan
95
+ retrieved_docs = vectorstore.similarity_search(query, k=5)
96
+ context = "\n".join([doc.page_content for doc in retrieved_docs]) if retrieved_docs else ""
97
+
98
+ # Lấy tương tác cũ
99
+ past_interactions = memory.load_memory_variables({})[memory.memory_key]
100
+ context_with_memory = f"{context}\n\nConversation History:\n{past_interactions}"
101
+
102
+ # Chuẩn bị prompt
103
+ messages = [
104
+ {
105
+ "role": "user",
106
+ "content": f"""You are a consultant advising clients on insurance products from MB Ageas Life in Vietnam. Please respond professionally and accurately, and suggest suitable products by asking a few questions about the customer's needs. All information provided must remain within the scope of MBAL. Invite the customer to register for a more detailed consultation at https://www.mbageas.life/
107
+ {context_with_memory}
108
+ Question: {query}
109
+ Answer:"""
110
+ }
111
+ ]
112
+
113
+ response_content = client.chat_completion(messages=messages, max_tokens=1024, stream=False)
114
+ response = response_content.choices[0].message.content.split("Answer:")[-1].strip()
115
+ return response
116
+
117
+
118
+ def process_feedback(query, response, feedback):
119
+ # st.write(f"Feedback received: {'👍' if feedback else '👎'} for query: {query}")
120
+ if feedback:
121
+ # If thumbs up, store the response in memory buffer
122
+ memory.chat_memory.add_ai_message(response)
123
+ else:
124
+ # If thumbs down, remove the response from memory buffer and regenerate the response
125
+ # memory.chat_memory.messages = [msg for msg in memory.chat_memory.messages if msg.get("content") != response]
126
+ new_query=f"{query}. Tạo câu trả lời đúng với câu hỏi"
127
+ new_response = rag_query(new_query)
128
+ st.markdown(new_response)
129
+ memory.chat_memory.add_ai_message(new_response)
130
+
131
+ # Streamlit interface
132
+
133
+ st.title("Chào mừng bạn đã đến với MBAL Chatbot")
134
+ st.markdown("***")
135
+ st.info('''
136
+ Tôi sẽ giải đáp các thắc mắc của bạn liên quan đến các sản phẩm bảo hiểm nhân thọ của MB Ageas Life''')
137
+
138
+ col1, col2 = st.columns(2)
139
+
140
+ with col1:
141
+ chat = st.button("Chat")
142
+ if chat:
143
+ st.switch_page("pages/chatbot.py")
144
+
145
+ with col2:
146
+ rag = st.button("Store Document")
147
+ if rag:
148
+ st.switch_page("pages/management.py")
149
+
150
  st.markdown("<div style='text-align:center;'></div>", unsafe_allow_html=True)