__import__('pysqlite3') import sys sys.modules['sqlite3'] = sys.modules.pop('pysqlite3') # DATABASES = { # 'default': { # 'ENGINE': 'django.db.backends.sqlite3', # 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), # } # } import streamlit as st from huggingface_hub import InferenceClient from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext, PromptTemplate from llama_index.vector_stores.chroma import ChromaVectorStore from llama_index.core import StorageContext from langchain.embeddings import HuggingFaceEmbeddings from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import Chroma import chromadb from langchain.memory import ConversationBufferMemory import pandas as pd from langchain.schema import Document # Set page config st.set_page_config(page_title="MBAL Chatbot", page_icon="🛡️", layout="wide") # Set your Hugging Face token here HF_TOKEN = st.secrets["HF_TOKEN"] @st.cache_resource def init_chroma(): persist_directory = "chroma_db" chroma_client = chromadb.PersistentClient(path=persist_directory) chroma_collection = chroma_client.get_or_create_collection("my_collection") return chroma_client, chroma_collection @st.cache_resource def init_vectorstore(): persist_directory = "chroma_db" embeddings = HuggingFaceEmbeddings() vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings, collection_name="my_collection") return vectorstore @st.cache_resource def setup_vector(): # Đọc dữ liệu từ file Excel df = pd.read_excel("chunk_metadata_template.xlsx") chunks = [] # Tạo danh sách các Document có metadata for _, row in df.iterrows(): chunk_with_metadata = Document( page_content=row['page_content'], metadata={ 'chunk_id': row['chunk_id'], 'document_title': row['document_title'], 'topic': row['topic'], 'access': row['access'] } ) chunks.append(chunk_with_metadata) # Khởi tạo embedding embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2') # Khởi tạo hoặc ghi vào vectorstore đã tồn tại persist_directory = "chroma_db" collection_name = "my_collection" # Tạo vectorstore từ dữ liệu và ghi vào Chroma vectorstore = Chroma.from_documents( documents=chunks, embedding=embeddings, persist_directory=persist_directory, collection_name=collection_name ) # Ghi xuống đĩa để đảm bảo dữ liệu được lưu vectorstore.persist() return vectorstore # Initialize components client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3", token=HF_TOKEN) chroma_client, chroma_collection = init_chroma() init_vectorstore() vectorstore = setup_vector() # Initialize memory buffer memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) def rag_query(query): # Lấy tài liệu liên quan retrieved_docs = vectorstore.similarity_search(query, k=5) context = "\n".join([doc.page_content for doc in retrieved_docs]) if retrieved_docs else "" # Lấy tương tác cũ past_interactions = memory.load_memory_variables({})[memory.memory_key] context_with_memory = f"{context}\n\nConversation History:\n{past_interactions}" # Chuẩn bị prompt messages = [ { "role": "user", "content": f"""You are a consultant advising clients on insurance products from MB Ageas Life in Vietnam. Please respond professionally and accurately, and suggest suitable products by asking a few questions about the customer's needs. All information provided must remain within the scope of MBAL. Invite the customer to register for a more detailed consultation at https://www.mbageas.life/ {context_with_memory} Question: {query} Answer:""" } ] response_content = client.chat_completion(messages=messages, max_tokens=1024, stream=False) response = response_content.choices[0].message.content.split("Answer:")[-1].strip() return response def process_feedback(query, response, feedback): # st.write(f"Feedback received: {'👍' if feedback else '👎'} for query: {query}") if feedback: # If thumbs up, store the response in memory buffer memory.chat_memory.add_ai_message(response) else: # If thumbs down, remove the response from memory buffer and regenerate the response # memory.chat_memory.messages = [msg for msg in memory.chat_memory.messages if msg.get("content") != response] new_query=f"{query}. Tạo câu trả lời đúng với câu hỏi" new_response = rag_query(new_query) st.markdown(new_response) memory.chat_memory.add_ai_message(new_response) # Streamlit interface st.title("Chào mừng bạn đã đến với MBAL Chatbot") st.markdown("***") st.info(''' Tôi sẽ giải đáp các thắc mắc của bạn liên quan đến các sản phẩm bảo hiểm nhân thọ của MB Ageas Life''') col1, col2 = st.columns(2) with col1: chat = st.button("Chat") if chat: st.switch_page("pages/chatbot.py") with col2: rag = st.button("Store Document") if rag: st.switch_page("pages/management.py") st.markdown("
", unsafe_allow_html=True)