File size: 5,499 Bytes
a76ab68
 
 
 
 
 
 
 
 
 
 
 
 
 
1112aa6
a76ab68
 
 
 
 
 
1112aa6
a76ab68
 
 
 
 
 
 
 
1112aa6
a76ab68
 
1112aa6
a76ab68
 
1112aa6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e454d3f
1112aa6
 
a76ab68
 
 
 
 
 
2a29635
a76ab68
 
 
 
 
2a29635
 
 
 
 
a76ab68
2a29635
 
a76ab68
 
 
e454d3f
a76ab68
 
 
 
 
e454d3f
a76ab68
1bb6f56
 
 
a76ab68
 
 
 
 
 
 
 
 
 
 
 
 
1112aa6
a76ab68
1112aa6
e454d3f
a76ab68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import streamlit as st
from langchain.llms import HuggingFacePipeline
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts.prompt import PromptTemplate
from langchain.embeddings import HuggingFaceEmbeddings, OpenAIEmbeddings
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.schema import Document
from langchain_community.llms import HuggingFaceEndpoint
from langchain.vectorstores import Chroma
from transformers import TextStreamer
from langchain.llms import HuggingFacePipeline
from langchain.prompts import ChatPromptTemplate
from langchain.llms import HuggingFaceHub
import os
import pandas as pd
from langchain.vectorstores import FAISS
import subprocess
from langchain_community.llms import HuggingFaceHub

import pandas as pd

# Configuración del modelo
MODEL_NAME = "mistralai/Mixtral-8x7B-Instruct-v0.1"
model_name = "google/gemma-2-2b"
TOKEN=os.getenv('HF_TOKEN')
subprocess.run(["huggingface-cli", "login", "--token", TOKEN, "--add-to-git-credential"])
######
# set this key as an environment variable
os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"]


# Initialize tokenizer
@st.cache_resource
def load_model():
  # MODEL_NAME= "lmsys/vicuna-7b-v1.5"
    MODEL_NAME = "google/gemma-2b-it"

    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        quantization_config=nf4_config, # add config
        torch_dtype=torch.bfloat16, # save memory using float16
        # low_cpu_mem_usage=True,
        token=get_hg_token(),
    ).to("cuda")

    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    model_pipeline = pipeline(
        'text-generation',
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=1024, # output token
        device_map="auto" # auto allocate GPU if available
    )

    return HuggingFacePipeline(pipeline=model_pipeline)

# Initialize embeddings
@st.cache_resource
def load_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/bkai-foundation-models/vietnamese-bi-encoder')
    # embeddings = OpenAIEmbeddings()
    return embeddings

# Chroma Vector store
@st.cache_resource
def setup_vector():
    chunks = []
    df = pd.read_excel(r"chunk_metadata_template.xlsx")
    for _, row in df.iterrows():
        chunk_with_metadata = Document(
            page_content=row['page_content'],
            metadata={
                'chunk_id': row['chunk_id'],
                'document_title': row['document_title'],
            }
        )
        chunks.append(chunk_with_metadata)
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/bkai-foundation-models/vietnamese-bi-encoder')
    return Chroma.from_documents(chunks, embedding=embeddings)

# Set up chain
def setup_conversation_chain():
    llm = load_model()
    vector = setup_vector()
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

    template = """Bạn là một chuyên viên tư vấn cho khách hàng về sản phẩm bảo hiểm của công ty MB Ageas Life tại Việt Nam.
    Hãy trả lời  chuyên nghiệp, chính xác, cung cấp thông tin trước rồi hỏi câu tiếp theo. Tất cả các thông tin cung cấp đều trong phạm vi MBAL. Khi có đủ thông tin khách hàng thì mới mời khách hàng đăng ký để nhận tư vấn trên https://www.mbageas.life/
        {context}
        Câu hỏi: {question}
        Trả lời:"""
 
    
    # PROMPT = ChatPromptTemplate.from_template(template=template)
    # chain = ConversationalRetrievalChain.from_llm(
    #     llm=llm,
    #     retriever=vector.as_retriever(search_kwargs={'k': 5}),
    #     memory=memory,
    #     combine_docs_chain_kwargs={"prompt": PROMPT}
    #     # condense_question_prompt=CUSTOM_QUESTION_PROMPT
    )
    chain = (
    {"context": vector.as_retriever(search_kwargs={'k': 5}) | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | parser
    )
    
    return chain

# Streamlit
def main():
    st.title("🛡️ MBAL Chatbot 🛡️")

    # Inicializar la cadena de conversación
    if 'conversation_chain' not in st.session_state:
        st.session_state.conversation_chain = setup_conversation_chain()

    # Mostrar mensajes del chat
    if 'messages' not in st.session_state:
        st.session_state.messages = []

    for message in st.session_state.messages:
        with st.chat_message(message["role"]):
            st.markdown(message["content"])

    # Campo de entrada para el usuario
    if prompt := st.chat_input("Bạn cần tư vấn về điều gì? Hãy chia sẻ nhu cầu và thông tin của bạn nhé!"):
        st.session_state.messages.append({"role": "user", "content": prompt})
        with st.chat_message("user"):
            st.markdown(prompt)

        with st.chat_message("assistant"):
            message_placeholder = st.empty()
            full_response = ""
            
            # Generar respuesta
            response = st.session_state.conversation_chain({"question": prompt, "chat_history": []})
            full_response = response['answer']
            # full_response = response.get("answer", "No response generated.")
            
            message_placeholder.markdown(full_response)
        
        st.session_state.messages.append({"role": "assistant", "content": full_response})

# if __name__ == "__main__":
main()