todap commited on
Commit
ada8cbb
·
verified ·
1 Parent(s): 40bbd2e

Upload 6 files

Browse files
app.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __import__('pysqlite3')
2
+ import sys
3
+ sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
4
+
5
+ # DATABASES = {
6
+ # 'default': {
7
+ # 'ENGINE': 'django.db.backends.sqlite3',
8
+ # 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'),
9
+ # }
10
+ # }
11
+ import streamlit as st
12
+ from huggingface_hub import InferenceClient
13
+ from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext, PromptTemplate
14
+ from llama_index.vector_stores.chroma import ChromaVectorStore
15
+ from llama_index.core import StorageContext
16
+ from langchain.embeddings import HuggingFaceEmbeddings
17
+ from langchain.text_splitter import CharacterTextSplitter
18
+ from langchain.vectorstores import Chroma
19
+ import chromadb
20
+ from langchain.memory import ConversationBufferMemory
21
+
22
+
23
+
24
+ # Set page config
25
+ st.set_page_config(page_title="RAG Chatbot", page_icon="🤖", layout="wide")
26
+
27
+ # Set your Hugging Face token here
28
+
29
+ HF_TOKEN = st.secrets["HF_TOKEN"]
30
+
31
+ # Initialize your models, databases, and other components here
32
+ @st.cache_resource
33
+ def init_chroma():
34
+ persist_directory = "chroma_db"
35
+ chroma_client = chromadb.PersistentClient(path=persist_directory)
36
+ chroma_collection = chroma_client.get_or_create_collection("my_collection")
37
+ return chroma_client, chroma_collection
38
+
39
+ @st.cache_resource
40
+ def init_vectorstore():
41
+ persist_directory = "chroma_db"
42
+ embeddings = HuggingFaceEmbeddings()
43
+ vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings, collection_name="my_collection")
44
+ return vectorstore
45
+
46
+ # Initialize components
47
+ client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3", token=HF_TOKEN)
48
+ chroma_client, chroma_collection = init_chroma()
49
+ vectorstore = init_vectorstore()
50
+
51
+ # Initialize memory buffer
52
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
53
+
54
+ def rag_query(query):
55
+ # Retrieve relevant documents using similarity search
56
+ retrieved_docs = vectorstore.similarity_search(query, k=3)
57
+
58
+ # Prepare context for LLaMA
59
+ if retrieved_docs:
60
+ context = "\n".join([doc.page_content for doc in retrieved_docs])
61
+ else:
62
+ context = ""
63
+
64
+ # Append new interaction to memory
65
+ memory.chat_memory.add_user_message(query)
66
+
67
+ # Retrieve past interactions for context
68
+ past_interactions = memory.load_memory_variables({})[memory.memory_key]
69
+ context_with_memory = f"{context}\n\nConversation History:\n{past_interactions}"
70
+
71
+ # Debugging: Display context and past interactions
72
+ # st.write("Debugging Info:")
73
+ # st.write("Context Sent to Model:", context_with_memory)
74
+ # st.write("Retrieved Documents:", [doc.page_content for doc in retrieved_docs])
75
+ # st.write("Past Interactions:", past_interactions)
76
+
77
+ # Generate response using LLaMA
78
+ messages = [
79
+ {"role": "user", "content": f"Context: {context_with_memory}\n\nQuestion: {query},it is not mandatory to use the context\n\nAnswer:"}
80
+ ]
81
+
82
+ # Get the response from the client
83
+ response_content = client.chat_completion(messages=messages, max_tokens=500, stream=False)
84
+
85
+ # Process the response content
86
+ response = response_content.choices[0].message.content.split("Answer:")[-1].strip()
87
+
88
+ # If the response is empty or very short, or if no relevant documents were found, use the LLM's default knowledge
89
+ if not context or len(response.split()) < 35 or not retrieved_docs:
90
+ messages = [{"role": "user", "content": query}]
91
+ response_content = client.chat_completion(messages=messages, max_tokens=500, stream=False)
92
+ response = response_content.choices[0].message.content
93
+
94
+ # Append the response to memory
95
+ memory.chat_memory.add_ai_message(response)
96
+
97
+ return response
98
+
99
+ def process_feedback(query, response, feedback):
100
+ # st.write(f"Feedback received: {'👍' if feedback else '👎'} for query: {query}")
101
+ if feedback:
102
+ # If thumbs up, store the response in memory buffer
103
+ memory.chat_memory.add_ai_message(response)
104
+ else:
105
+ # If thumbs down, remove the response from memory buffer and regenerate the response
106
+ # memory.chat_memory.messages = [msg for msg in memory.chat_memory.messages if msg.get("content") != response]
107
+ new_query=f"{query}. Give better response"
108
+ new_response = rag_query(new_query)
109
+ st.markdown(new_response)
110
+ memory.chat_memory.add_ai_message(new_response)
111
+
112
+ # Streamlit interface
113
+
114
+ st.title("Welcome to our RAG-Based Chatbot")
115
+ st.markdown("***")
116
+ st.info('''
117
+ To use Our Mistral supported Chatbot, click Chat.
118
+
119
+ To push data, click on Store Document.
120
+ ''')
121
+
122
+ col1, col2 = st.columns(2)
123
+
124
+ with col1:
125
+ chat = st.button("Chat")
126
+ if chat:
127
+ st.switch_page("pages/chatbot.py")
128
+
129
+ with col2:
130
+ rag = st.button("Store Document")
131
+ if rag:
132
+ st.switch_page("pages/management.py")
133
+
134
+ st.markdown("<div style='text-align:center;'></div>", unsafe_allow_html=True)
chroma_db/.DS_Store ADDED
Binary file (6.15 kB). View file
 
chroma_db/chroma.sqlite3 ADDED
Binary file (156 kB). View file
 
pages/chatbot.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from app import rag_query, process_feedback
3
+
4
+
5
+ st.title("RAG Chatbot")
6
+
7
+ # Initialize chat history
8
+ if "messages" not in st.session_state:
9
+ st.session_state.messages = []
10
+
11
+ # Display chat messages from history on app rerun
12
+ for i, message in enumerate(st.session_state.messages):
13
+ with st.chat_message(message["role"]):
14
+ st.markdown(message["content"])
15
+ if message["role"] == "assistant":
16
+ col1, col2 = st.columns([1,15])
17
+ with col1:
18
+ if st.button("👍", key=f"thumbs_up_{i}"):
19
+ process_feedback(st.session_state.messages[i-1]["content"], message["content"], True)
20
+ with col2:
21
+ if st.button("👎", key=f"thumbs_down_{i}"):
22
+ process_feedback(st.session_state.messages[i-1]["content"], message["content"], False)
23
+ # st.session_state.messages.pop() # Remove the last assistant message
24
+ #st.rerun() # Rerun the app to regenerate the response
25
+
26
+ # React to user input
27
+ if prompt := st.chat_input("What is your question?"):
28
+ # Display user message in chat message container
29
+ st.chat_message("user").markdown(prompt)
30
+ # Add user message to chat history
31
+ st.session_state.messages.append({"role": "user", "content": prompt})
32
+
33
+ response = rag_query(prompt)
34
+
35
+ # Display assistant response in chat message container
36
+ with st.chat_message("assistant"):
37
+ st.markdown(response)
38
+ # Add assistant response to chat history
39
+ st.session_state.messages.append({"role": "assistant", "content": response})
40
+
41
+ # Rerun the app to display the feedback buttons
42
+ st.experimental_rerun()
43
+
44
+ # Sidebar for additional controls
45
+ with st.sidebar:
46
+ st.header("Options")
47
+ if st.button("Clear Chat History"):
48
+ st.session_state.messages = []
49
+ st.experimental_rerun()
pages/management.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ from langchain.document_loaders import DirectoryLoader, TextLoader, PyPDFLoader
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from app import vectorstore
6
+
7
+
8
+ st.title("Document Management")
9
+
10
+ # File uploader
11
+ uploaded_file = st.file_uploader("Choose a file", type=['txt', 'pdf', 'docx'])
12
+
13
+ if uploaded_file is not None:
14
+ # Create a temporary directory to store the uploaded file
15
+ temp_dir = "temp_uploads"
16
+ os.makedirs(temp_dir, exist_ok=True)
17
+ file_path = os.path.join(temp_dir, uploaded_file.name)
18
+
19
+ # Save the uploaded file temporarily
20
+ with open(file_path, "wb") as f:
21
+ f.write(uploaded_file.getbuffer())
22
+
23
+ st.success(f"File {uploaded_file.name} successfully uploaded!")
24
+
25
+ # Process the uploaded file
26
+ if st.button("Process Document"):
27
+ with st.spinner("Processing document..."):
28
+ try:
29
+ # Load the document based on file type
30
+ if uploaded_file.type == "application/pdf":
31
+ loader = PyPDFLoader(file_path)
32
+ elif uploaded_file.type == "text/plain":
33
+ loader = TextLoader(file_path)
34
+ else:
35
+ st.error("Unsupported file type.")
36
+ st.stop()
37
+
38
+ documents = loader.load()
39
+
40
+ # Split the document into chunks
41
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
42
+ texts = text_splitter.split_documents(documents)
43
+
44
+ # Add the chunks to the vectorstore
45
+ vectorstore.add_documents(texts)
46
+
47
+ st.success(f"Document processed and added to the knowledge base!")
48
+ except Exception as e:
49
+ st.error(f"An error occurred: {e}")
50
+
51
+ # Clean up: remove the temporary file
52
+ os.remove(file_path)
53
+
54
+ # Display current documents in the knowledge base
55
+ # st.subheader("Current Documents in Knowledge Base")
56
+ # # This is a placeholder. You'll need to implement a method to retrieve and display
57
+ # # the list of documents currently in your Chroma database.
58
+ # st.write("Placeholder for document list")
59
+
60
+ # # Option to clear the entire knowledge base
61
+ # if st.button("Clear Knowledge Base"):
62
+ # if st.sidebar.checkbox("Are you sure you want to clear the entire knowledge base? This action cannot be undone."):
63
+ # try:
64
+ # # Clear the Chroma database
65
+ # vectorstore.delete()
66
+ # st.success("Knowledge base cleared!")
67
+ # except Exception as e:
68
+ # st.error(f"An error occurred: {e}")
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ chromadb==0.5.5
2
+ huggingface_hub==0.23.4
3
+ langchain==0.2.12
4
+ llama_index==0.10.62
5
+ streamlit==1.36.0
6
+ streamlit_extras==0.4.6
7
+ llama-index-vector-stores-chroma==0.1.10
8
+ pysqlite3-binary
9
+ langchain-community==0.2.6
10
+ sentence-transformers