Spaces:
Running
Running
File size: 5,498 Bytes
3c7d3dd 9c8ee9a 3c7d3dd ada8cbb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
# DATABASES = {
# 'default': {
# 'ENGINE': 'django.db.backends.sqlite3',
# 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'),
# }
# }
import streamlit as st
from huggingface_hub import InferenceClient
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext, PromptTemplate
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
import chromadb
from langchain.memory import ConversationBufferMemory
import pandas as pd
from langchain.schema import Document
# Set page config
st.set_page_config(page_title="MBAL Chatbot", page_icon="🛡️", layout="wide")
# Set your Hugging Face token here
HF_TOKEN = st.secrets["HF_TOKEN"]
@st.cache_resource
def init_chroma():
persist_directory = "chroma_db"
chroma_client = chromadb.PersistentClient(path=persist_directory)
chroma_collection = chroma_client.get_or_create_collection("my_collection")
return chroma_client, chroma_collection
@st.cache_resource
def init_vectorstore():
persist_directory = "chroma_db"
embeddings = HuggingFaceEmbeddings()
vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings, collection_name="my_collection")
return vectorstore
@st.cache_resource
def setup_vector():
# Đọc dữ liệu từ file Excel
df = pd.read_excel("chunk_metadata_template (1).xlsx")
chunks = []
# Tạo danh sách các Document có metadata
for _, row in df.iterrows():
chunk_with_metadata = Document(
page_content=row['page_content'],
metadata={
'chunk_id': row['chunk_id'],
'document_title': row['document_title'],
'topic': row['topic'],
'access': row['access']
}
)
chunks.append(chunk_with_metadata)
# Khởi tạo embedding
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
# Khởi tạo hoặc ghi vào vectorstore đã tồn tại
persist_directory = "chroma_db"
collection_name = "my_collection"
# Tạo vectorstore từ dữ liệu và ghi vào Chroma
vectorstore = Chroma.from_documents(
documents=chunks,
embedding=embeddings,
persist_directory=persist_directory,
collection_name=collection_name
)
# Ghi xuống đĩa để đảm bảo dữ liệu được lưu
vectorstore.persist()
return vectorstore
# Initialize components
client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3", token=HF_TOKEN)
chroma_client, chroma_collection = init_chroma()
init_vectorstore()
vectorstore = setup_vector()
# Initialize memory buffer
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
def rag_query(query):
# Lấy tài liệu liên quan
retrieved_docs = vectorstore.similarity_search(query, k=5)
context = "\n".join([doc.page_content for doc in retrieved_docs]) if retrieved_docs else ""
# Lấy tương tác cũ
past_interactions = memory.load_memory_variables({})[memory.memory_key]
context_with_memory = f"{context}\n\nConversation History:\n{past_interactions}"
# Chuẩn bị prompt
messages = [
{
"role": "user",
"content": f"""You are a consultant advising clients on insurance products from MB Ageas Life in Vietnam. Please respond professionally and accurately, and suggest suitable products by asking a few questions about the customer's needs. All information provided must remain within the scope of MBAL. Invite the customer to register for a more detailed consultation at https://www.mbageas.life/
{context_with_memory}
Question: {query}
Answer:"""
}
]
response_content = client.chat_completion(messages=messages, max_tokens=1024, stream=False)
response = response_content.choices[0].message.content.split("Answer:")[-1].strip()
return response
def process_feedback(query, response, feedback):
# st.write(f"Feedback received: {'👍' if feedback else '👎'} for query: {query}")
if feedback:
# If thumbs up, store the response in memory buffer
memory.chat_memory.add_ai_message(response)
else:
# If thumbs down, remove the response from memory buffer and regenerate the response
# memory.chat_memory.messages = [msg for msg in memory.chat_memory.messages if msg.get("content") != response]
new_query=f"{query}. Tạo câu trả lời đúng với câu hỏi"
new_response = rag_query(new_query)
st.markdown(new_response)
memory.chat_memory.add_ai_message(new_response)
# Streamlit interface
st.title("Chào mừng bạn đã đến với MBAL Chatbot")
st.markdown("***")
st.info('''
Tôi sẽ giải đáp các thắc mắc của bạn liên quan đến các sản phẩm bảo hiểm nhân thọ của MB Ageas Life''')
col1, col2 = st.columns(2)
with col1:
chat = st.button("Chat")
if chat:
st.switch_page("pages/chatbot.py")
with col2:
rag = st.button("Store Document")
if rag:
st.switch_page("pages/management.py")
st.markdown("<div style='text-align:center;'></div>", unsafe_allow_html=True) |