ManTea commited on
Commit
9675d25
·
1 Parent(s): 52817c5

update history chat

Browse files
Files changed (2) hide show
  1. NLP_model/chatbot.py +65 -149
  2. mongodb.py +71 -0
NLP_model/chatbot.py CHANGED
@@ -6,26 +6,14 @@ from langchain_community.vectorstores import FAISS
6
  from langchain.chains import RetrievalQA, ConversationalRetrievalChain
7
  from langchain_google_genai import ChatGoogleGenerativeAI
8
  from langchain.prompts import PromptTemplate
 
9
  from pinecone import Pinecone, ServerlessSpec
10
  from langchain_pinecone import PineconeVectorStore
11
  from dotenv import load_dotenv
12
  import threading
13
  from datetime import datetime
14
- import time
15
  from langchain.schema import HumanMessage, AIMessage
16
  from langchain_google_genai import GoogleGenerativeAIEmbeddings
17
- import functools
18
- import hashlib
19
- import logging
20
- import random
21
-
22
- # Configure logging
23
- logging.basicConfig(
24
- level=logging.INFO,
25
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
26
- )
27
- logger = logging.getLogger(__name__)
28
-
29
  # Load environment variables
30
  load_dotenv()
31
 
@@ -41,38 +29,38 @@ os.environ["PINECONE_API_KEY"] = pinecone_api_key
41
 
42
  genai.configure(api_key=google_api_key)
43
 
44
- # Lấy model chatbot
45
  model = ChatGoogleGenerativeAI(model="gemini-1.5-flash-8b-latest",
46
  temperature=0.8)
 
 
47
 
48
- # Lấy model embedding
49
  embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
50
 
 
51
  # Biến lưu history cho từng user (dạng chuỗi)
52
  user_histories = {}
53
  history_lock = threading.Lock()
54
 
55
- # Cache for responses
56
- response_cache = {}
57
- cache_lock = threading.Lock()
58
- # Maximum cache size và thời gian sống (30 phút)
59
- MAX_CACHE_SIZE = 100
60
- CACHE_TTL = 1800 # 30 phút tính bằng giây
61
-
62
  # Create a prompt template with conversation history
63
  prompt = PromptTemplate(
64
  template = """Goal:
65
  You are a professional tour guide assistant that assists users in finding information about places in Da Nang, Vietnam.
66
  You can provide details on restaurants, cafes, hotels, attractions, and other local venues. You have to chat with users, who are Da Nang tourists.
67
 
 
68
  Return Format:
69
- Respond in friendly, natural, and concise English like a real tour guide.
 
 
 
 
70
 
71
  Warning:
72
- Let's support users like a real tour guide, not a bot. The information in context is your own knowledge.
73
- Your knowledge is provided in the Context. All of information in Context is about Da Nang, Vietnam.
74
- You just care about current time that user mention when user ask about Solana event.
75
- If you do not have enough information to answer user's question, reply with "I don't know. I don't have information about that".
76
 
77
  Context:
78
  {context}
@@ -103,8 +91,8 @@ def update_history(user_id, new_entry):
103
  # Store only the last 30 interactions by keeping the 60 most recent lines
104
  # (assuming 2 lines per interaction: 1 for user, 1 for bot)
105
  history_lines = current_history.split('\n')
106
- if len(history_lines) > 20:
107
- history_lines = history_lines[-20:]
108
  current_history = '\n'.join(history_lines)
109
 
110
  updated_history = current_history + new_entry + "\n"
@@ -137,145 +125,73 @@ def string_to_message_history(history_str):
137
 
138
  return messages
139
 
140
- # Singleton pattern để chỉ khởi tạo retriever một lần
141
- _retriever_instance = None
142
- _retriever_lock = threading.Lock()
143
-
144
  def get_chain():
145
- """Get the retrieval chain with Pinecone vector store (singleton pattern)"""
146
- global _retriever_instance
147
-
148
- # Nếu đã có instance, trả về ngay
149
- if _retriever_instance is not None:
150
- return _retriever_instance
151
-
152
- # Thread-safe khởi tạo
153
- with _retriever_lock:
154
- # Kiểm tra lại trong trường hợp một thread khác đã khởi tạo
155
- if _retriever_instance is not None:
156
- return _retriever_instance
157
-
158
- try:
159
- start_time = time.time()
160
- pc = Pinecone(
161
- api_key=os.environ["PINECONE_API_KEY"]
162
- )
163
-
164
- # Get the vector store from the existing index
165
- vectorstore = PineconeVectorStore.from_existing_index(
166
- index_name="testbot768",
167
- embedding=embeddings,
168
- text_key="text"
169
- )
170
-
171
- _retriever_instance = vectorstore.as_retriever(search_kwargs={"k": 3})
172
- logger.info(f"Pinecone retriever initialized in {time.time() - start_time:.2f} seconds")
173
- return _retriever_instance
174
- except Exception as e:
175
- logger.error(f"Error getting vector store from Pinecone: {e}")
176
- # Fallback to a local vector store or return None
177
- try:
178
- # Try to load a local FAISS index if it exists
179
- start_time = time.time()
180
- vectorstore = FAISS.load_local("faiss_index", embeddings)
181
- _retriever_instance = vectorstore.as_retriever(search_kwargs={"k": 3})
182
- logger.info(f"FAISS retriever initialized in {time.time() - start_time:.2f} seconds")
183
- return _retriever_instance
184
- except Exception as faiss_error:
185
- logger.error(f"Error getting FAISS vector store: {faiss_error}")
186
- return None
187
-
188
- def clean_cache():
189
- """Clean expired cache entries"""
190
- with cache_lock:
191
- current_time = time.time()
192
- expired_keys = [k for k, v in response_cache.items() if current_time - v['timestamp'] > CACHE_TTL]
193
 
194
- for key in expired_keys:
195
- del response_cache[key]
196
-
197
- # Nếu cache vẫn quá lớn, xóa các mục cũ nhất
198
- if len(response_cache) > MAX_CACHE_SIZE:
199
- # Sắp xếp theo thời gian và giữ lại MAX_CACHE_SIZE mục mới nhất
200
- sorted_items = sorted(response_cache.items(), key=lambda x: x[1]['timestamp'])
201
- items_to_remove = sorted_items[:len(sorted_items) - MAX_CACHE_SIZE]
202
 
203
- for key, _ in items_to_remove:
204
- del response_cache[key]
205
-
206
- def generate_cache_key(request, user_id):
207
- """Generate a unique cache key from the request and user_id"""
208
- # Tạo một chuỗi kết hợp để hash
209
- combined = f"{request.strip().lower()}:{user_id}"
210
- # Tạo MD5 hash
211
- return hashlib.md5(combined.encode()).hexdigest()
212
 
213
  def chat(request, user_id="default_user"):
214
  """Process a chat request from a specific user"""
215
- start_time = time.time()
216
-
217
- # Định kỳ xóa các mục cache hết hạn
218
- if random.random() < 0.1: # 10% cơ hội mỗi lần gọi
219
- clean_cache()
220
-
221
- # Tạo cache key
222
- cache_key = generate_cache_key(request, user_id)
223
-
224
- # Kiểm tra cache
225
- with cache_lock:
226
- if cache_key in response_cache:
227
- cache_data = response_cache[cache_key]
228
- # Kiểm tra thời gian sống
229
- if time.time() - cache_data['timestamp'] <= CACHE_TTL:
230
- logger.info(f"Cache hit for user {user_id}, request: '{request[:30]}...'")
231
- # Cập nhật timestamp để reset TTL
232
- cache_data['timestamp'] = time.time()
233
- # Vẫn cập nhật lịch sử trò chuyện
234
- new_entry = f"User: {request}\nBot: {cache_data['response']}"
235
- update_history(user_id, new_entry)
236
- return cache_data['response']
237
  try:
 
238
  retriever = get_chain()
239
  if not retriever:
240
  return "Error: Could not initialize retriever"
241
 
 
 
 
 
 
 
 
242
  current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
243
 
244
- retrieved_docs = retriever.get_relevant_documents(request)
245
- context = "\n".join([doc.page_content for doc in retrieved_docs])
246
- # context = context + "\n(Current time: " + current_time + ")"
247
- # print("Context:", context)
248
 
249
- # print(prompt.format(
250
- # context=context,
251
- # question=request,
252
- # chat_history=get_history(user_id)
253
- # ))
254
- response = model.invoke(
255
- prompt.format(
256
- context=context,
257
- question=request,
258
- chat_history=get_history(user_id)
259
- )
260
  )
261
- answer = str(response.content)
262
 
263
- new_entry = f"User: {request}\nBot: {answer}"
264
- update_history(user_id, new_entry)
265
- # print(get_history(user_id))
266
 
267
- # Lưu vào cache
268
- with cache_lock:
269
- response_cache[cache_key] = {
270
- 'response': answer,
271
- 'timestamp': time.time()
272
- }
273
 
274
- logger.info(f"Total processing time: {time.time() - start_time:.2f} seconds")
275
  return answer
276
  except Exception as e:
277
- logger.error(f"Error in chat: {e}")
278
- return f"I don't know how to answer that right now. Let me forward this to the admin team."
279
 
280
  def clear_memory(user_id="default_user"):
281
  """Clear the conversation history for a specific user"""
@@ -283,4 +199,4 @@ def clear_memory(user_id="default_user"):
283
  if user_id in user_histories:
284
  del user_histories[user_id]
285
  return f"Conversation history cleared for user {user_id}"
286
- return f"No conversation history found for user {user_id}"
 
6
  from langchain.chains import RetrievalQA, ConversationalRetrievalChain
7
  from langchain_google_genai import ChatGoogleGenerativeAI
8
  from langchain.prompts import PromptTemplate
9
+ from langchain_ollama import OllamaLLM
10
  from pinecone import Pinecone, ServerlessSpec
11
  from langchain_pinecone import PineconeVectorStore
12
  from dotenv import load_dotenv
13
  import threading
14
  from datetime import datetime
 
15
  from langchain.schema import HumanMessage, AIMessage
16
  from langchain_google_genai import GoogleGenerativeAIEmbeddings
 
 
 
 
 
 
 
 
 
 
 
 
17
  # Load environment variables
18
  load_dotenv()
19
 
 
29
 
30
  genai.configure(api_key=google_api_key)
31
 
32
+ #lấy model chatbot
33
  model = ChatGoogleGenerativeAI(model="gemini-1.5-flash-8b-latest",
34
  temperature=0.8)
35
+ # model = OllamaLLM(model="llama2")
36
+ # print("Llama2 đã được tải thành công!")
37
 
38
+ #lấy model embedding
39
  embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
40
 
41
+
42
  # Biến lưu history cho từng user (dạng chuỗi)
43
  user_histories = {}
44
  history_lock = threading.Lock()
45
 
 
 
 
 
 
 
 
46
  # Create a prompt template with conversation history
47
  prompt = PromptTemplate(
48
  template = """Goal:
49
  You are a professional tour guide assistant that assists users in finding information about places in Da Nang, Vietnam.
50
  You can provide details on restaurants, cafes, hotels, attractions, and other local venues. You have to chat with users, who are Da Nang tourists.
51
 
52
+
53
  Return Format:
54
+ - Respond in clear, natural, and concise English.
55
+ - If you do not have enough information to answer user's question, reply with "I don't know", and explain that you are not sure about the information.
56
+ - When sufficient information is available in the Context, provide a specific and informative answer.
57
+ - Let's support users like a real tour guide, not a bot. The information in context is your own knowledge.
58
+ - You just care about time that user mention when they ask about Solana event.
59
 
60
  Warning:
61
+ - Your knowledge is provided in the Context. All of information in Context is about Da Nang, Vietnam.
62
+ - Do not fabricate or guess information.
63
+ - Answer with "I don't know" if you don't have enough information.
 
64
 
65
  Context:
66
  {context}
 
91
  # Store only the last 30 interactions by keeping the 60 most recent lines
92
  # (assuming 2 lines per interaction: 1 for user, 1 for bot)
93
  history_lines = current_history.split('\n')
94
+ if len(history_lines) > 60:
95
+ history_lines = history_lines[-60:]
96
  current_history = '\n'.join(history_lines)
97
 
98
  updated_history = current_history + new_entry + "\n"
 
125
 
126
  return messages
127
 
 
 
 
 
128
  def get_chain():
129
+ """Get the retrieval chain with Pinecone vector store"""
130
+ try:
131
+ pc = Pinecone(
132
+ api_key=os.environ["PINECONE_API_KEY"]
133
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
+ # Get the vector store from the existing index
136
+ vectorstore = PineconeVectorStore.from_existing_index(
137
+ index_name="testbot768",
138
+ embedding=embeddings,
139
+ text_key="text"
140
+ )
141
+
142
+ retrieve = vectorstore.as_retriever(search_kwargs={"k": 3})
143
 
144
+ return retrieve
145
+ except Exception as e:
146
+ print(f"Error getting vector store: {e}")
147
+ return None
 
 
 
 
 
148
 
149
  def chat(request, user_id="default_user"):
150
  """Process a chat request from a specific user"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  try:
152
+ # Get retrieval chain
153
  retriever = get_chain()
154
  if not retriever:
155
  return "Error: Could not initialize retriever"
156
 
157
+ # Get current conversation history as string
158
+ conversation_history_str = get_history(user_id)
159
+
160
+ # Convert string history to LangChain message format
161
+ message_history = string_to_message_history(conversation_history_str)
162
+
163
+ # Get current time
164
  current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
165
 
166
+ # Add timestamp to question
167
+ question_with_time = f"{request}\n(Current time: {current_time})"
168
+ # print("User question:", question_with_time)
 
169
 
170
+ # Create a ConversationalRetrievalChain
171
+ # Get relevant documents from retriever
172
+ retrieved_docs = retriever.get_relevant_documents(question_with_time)
173
+ print("Retrieved documents page content:", [doc.page_content for doc in retrieved_docs])
174
+
175
+ conversation_chain = ConversationalRetrievalChain.from_llm(
176
+ llm=model,
177
+ retriever=retriever,
178
+ combine_docs_chain_kwargs={"prompt": prompt}
 
 
179
  )
 
180
 
181
+ # Call the chain with question and converted message history
182
+ response = conversation_chain({"question": question_with_time, "chat_history": message_history})
183
+ answer = str(response['answer'])
184
 
185
+ # Update conversation history string
186
+ new_entry = f"User: {question_with_time}\nBot: {answer}"
187
+ update_history(user_id, new_entry)
188
+ print(get_history(user_id))
 
 
189
 
190
+ print(answer)
191
  return answer
192
  except Exception as e:
193
+ print(f"Error in chat: {e}")
194
+ return f"I encountered an error: {str(e)}"
195
 
196
  def clear_memory(user_id="default_user"):
197
  """Clear the conversation history for a specific user"""
 
199
  if user_id in user_histories:
200
  del user_histories[user_id]
201
  return f"Conversation history cleared for user {user_id}"
202
+ return f"No conversation history found for user {user_id}"
mongodb.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pymongo import MongoClient
3
+ import logging
4
+ from dotenv import load_dotenv
5
+
6
+ # Load biến môi trường từ .env (nếu có)
7
+ load_dotenv()
8
+
9
+ # Cấu hình logging
10
+ logging.basicConfig(level=logging.INFO)
11
+ logger = logging.getLogger(__name__)
12
+
13
+ # Lấy thông tin kết nối MongoDB từ biến môi trường
14
+ MONGODB_URI = os.getenv("MONGODB_URI")
15
+ MONGODB_DB = os.getenv("MONGODB_DB")
16
+ MONGODB_COLLECTION = os.getenv("MONGODB_COLLECTION")
17
+
18
+ # Kết nối MongoDB sử dụng pymongo
19
+ client = MongoClient(MONGODB_URI)
20
+ db = client[MONGODB_DB]
21
+ collection = db[MONGODB_COLLECTION]
22
+
23
+
24
+ def get_chat_history(user_id: int) -> str:
25
+ """
26
+ Lấy lịch sử chat cho user_id cho trước từ MongoDB và ghép thành chuỗi theo định dạng:
27
+
28
+ Bot: ...
29
+ User: ...
30
+ Bot: ...
31
+ ...
32
+
33
+ Giả sử:
34
+ - Các document chứa trường "user_id" để lọc theo user_id.
35
+ - Trường "factor" xác định nguồn tin (nếu factor == "user" thì là tin của User,
36
+ còn lại coi là tin của Bot/RAG).
37
+ - Trường "timestamp" dùng để sắp xếp theo thời gian (nếu có).
38
+ """
39
+ try:
40
+ # Truy vấn tất cả các document có user_id, sắp xếp theo timestamp tăng dần
41
+ # Nếu không có trường timestamp, có thể sort theo _id
42
+ docs = list(collection.find({"user_id": user_id}).sort("timestamp", 1).limit(15))
43
+ if not docs:
44
+ logger.info(f"Không tìm thấy dữ liệu cho user_id: {user_id}")
45
+ return ""
46
+
47
+ conversation_lines = []
48
+ for doc in docs:
49
+ factor = doc.get("factor", "").lower()
50
+ action = doc.get("action", "").lower()
51
+ message = doc.get("message", "")
52
+
53
+ if action == "freely asking":
54
+ conversation_lines.append(f"User: {message}")
55
+ elif action == "response":
56
+ conversation_lines.append(f"Bot: {message}")
57
+
58
+ # Ghép các dòng thành chuỗi, mỗi dòng cách nhau bằng xuống dòng
59
+ return "\n".join(conversation_lines)
60
+ except Exception as e:
61
+ logger.error(f"Lỗi khi lấy lịch sử chat cho user_id {user_id}: {e}")
62
+ return ""
63
+
64
+ # if __name__ == '__main__':
65
+ # user_id = int(input("Nhập user_id cần lấy lịch sử chat: ").strip())
66
+ # history = get_chat_history(user_id)
67
+ # if history:
68
+ # print("\nLịch sử trò chuyện:")
69
+ # print(history)
70
+ # else:
71
+ # print(f"Không tìm thấy lịch sử chat cho user_id: {user_id}")