ak0601 commited on
Commit
c111348
·
verified ·
1 Parent(s): 664be02

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +242 -0
  2. documents.pkl +3 -0
  3. requirements.txt +0 -0
app.py ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain.memory import ConversationBufferMemory
3
+ from llama_index.core.indices.query.schema import QueryBundle
4
+ from llama_index.core import Document, VectorStoreIndex
5
+ from llama_index.core.text_splitter import SentenceSplitter
6
+ from llama_index.core.retrievers import QueryFusionRetriever
7
+ from llama_index.retrievers.bm25 import BM25Retriever
8
+ from llama_index.core.postprocessor import SentenceTransformerRerank
9
+ from llama_index.core.prompts import PromptTemplate
10
+ from llama_index.core.query_engine import RetrieverQueryEngine
11
+ from llama_index.embeddings.gemini import GeminiEmbedding
12
+ from llama_index.llms.gemini import Gemini
13
+ from llama_index.core import Settings
14
+ from llama_index.vector_stores.faiss import FaissVectorStore
15
+ from llama_index.core import (
16
+ SimpleDirectoryReader,
17
+ load_index_from_storage,
18
+ VectorStoreIndex,
19
+ StorageContext,
20
+ )
21
+ from llama_index.core.node_parser import SemanticSplitterNodeParser
22
+
23
+ import os
24
+ import faiss
25
+ import pickle
26
+ import spacy
27
+
28
+ # Load NLP model
29
+ # nlp = spacy.load("en_core_web_sm")
30
+
31
+ # Set API Key
32
+ GOOGLE_API_KEY = "AIzaSyDRTL3H6EmqCMhsuD3nla5ZkNiwQDyuYbk"
33
+ os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
34
+
35
+ # Function to load documents
36
+ def load_documents(filename="documents.pkl"):
37
+ with open(filename, "rb") as file:
38
+ return pickle.load(file)
39
+
40
+ # Load stored documents
41
+ loaded_docs = load_documents()
42
+
43
+ # Function to split text into sentences
44
+ # def spacy_sentence_splitter(text):
45
+ # doc = nlp(text)
46
+ # return [sent.text for sent in doc.sents]
47
+ embed_model = GeminiEmbedding(model_name="models/embedding-001", use_async=False)
48
+ splitter = SemanticSplitterNodeParser(
49
+ buffer_size=5, breakpoint_percentile_threshold=95, embed_model=embed_model
50
+ )
51
+ # splitter = SentenceSplitter(chunk_size=512, chunk_overlap=50, separator="\n")
52
+ nodes = splitter.get_nodes_from_documents([doc for doc in loaded_docs])
53
+ chunked_documents = [Document(text=node.text, metadata=node.metadata) for node in nodes]
54
+ # Process documents
55
+ # chunked_documents = [
56
+ # Document(text=chunk_text, metadata=doc.metadata)
57
+ # for doc in loaded_docs for chunk_text in spacy_sentence_splitter(doc.text)
58
+ # ]
59
+
60
+ # Configure LLM and embeddings
61
+ Settings.llm = Gemini(model="models/gemini-2.0-flash", api_key=GOOGLE_API_KEY, temperature=0.5)
62
+
63
+ dimension = 768
64
+ faiss_index = faiss.IndexFlatL2(dimension)
65
+ vector_store = FaissVectorStore(faiss_index=faiss_index)
66
+ storage_context = StorageContext.from_defaults(vector_store=vector_store)
67
+
68
+ # Build index
69
+ index = VectorStoreIndex.from_documents(
70
+ documents=chunked_documents,
71
+ storage_context=storage_context,
72
+ embed_model=embed_model,
73
+ show_progress=True
74
+ )
75
+ index.storage_context.persist()
76
+
77
+ # Initialize memory
78
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
79
+
80
+ def get_chat_history():
81
+ return memory.load_memory_variables({})["chat_history"]
82
+
83
+ # Define chatbot prompt template
84
+ prompt_template = PromptTemplate(
85
+ """You are a friendly college counselor with expertise in Indian technical institutes.
86
+ Previous conversation context (if any):\n{chat_history}\n\n
87
+ Available college information:\n{context_str}\n\n"
88
+ User query: {query_str}\n\n
89
+ Instructions:\n
90
+ 1. Provide a brief, direct answer using only the information available above\n
91
+ 2. If specific data is not available, clearly state that\n
92
+ 3. Keep responses under 3 sentences when possible\n
93
+ 4. If comparing colleges, use bullet points for clarity\n
94
+ 5. Use a friendly, conversational tone\n
95
+ 6. Always be interactive and ask follow-up questions\n
96
+ 7. Always try to give answers in points each point should focus on single aspect of the response.\n
97
+ 8. Always try to give conclusion of your answer in the end for the user to take a decision.\n
98
+ Response:"""
99
+ )
100
+
101
+ # Configure retrieval and query engine
102
+ vector_retriever = index.as_retriever(similarity_top_k=10)
103
+ bm25_retriever = BM25Retriever.from_defaults(index=index, similarity_top_k=10)
104
+ hybrid_retriever = QueryFusionRetriever(
105
+ [vector_retriever, bm25_retriever],
106
+ similarity_top_k=10,
107
+ num_queries=10,
108
+ mode="reciprocal_rerank",
109
+ use_async=False
110
+ )
111
+
112
+ reranker = SentenceTransformerRerank(
113
+ model="cross-encoder/ms-marco-MiniLM-L-2-v2",
114
+ top_n=10,
115
+ )
116
+
117
+ query_engine = RetrieverQueryEngine.from_args(
118
+ retriever=hybrid_retriever,
119
+ node_postprocessors=[reranker],
120
+ llm=Settings.llm,
121
+ verbose=True,
122
+ prompt_template=prompt_template,
123
+ use_async=False,
124
+ )
125
+
126
+ # Streamlit UI
127
+ st.title("📚 Precollege Chatbot")
128
+ st.write("Ask me anything about different colleges and their courses!")
129
+
130
+ # Custom CSS for WhatsApp-like interface
131
+ st.markdown("""
132
+ <style>
133
+ body {
134
+ background-color: #111b21;
135
+ color: #e9edef;
136
+ }
137
+ .stApp {
138
+ background-color: #111b21;
139
+ }
140
+ .chat-container {
141
+ padding: 10px;
142
+ color: #111b21;
143
+ }
144
+ .user-message {
145
+ background-color: #005c4b;
146
+ color: #e9edef;
147
+ padding: 10px 15px;
148
+ border-radius: 15px;
149
+ margin: 5px 0;
150
+ max-width: 70%;
151
+ margin-left: auto;
152
+ margin-right: 10px;
153
+ }
154
+ .ai-message {
155
+ background-color: #1f2c33;
156
+ color: #e9edef;
157
+ padding: 10px 15px;
158
+ border-radius: 15px;
159
+ margin: 5px 0;
160
+ max-width: 70%;
161
+ margin-right: auto;
162
+ margin-left: 10px;
163
+ box-shadow: 0 1px 2px rgba(255,255,255,0.1);
164
+ }
165
+ .message-container {
166
+ display: flex;
167
+ margin-bottom: 10px;
168
+ }
169
+ .stTextInput input {
170
+ border-radius: 20px;
171
+ padding: 10px 20px;
172
+ border: 1px solid #ccc;
173
+ background-color: #2a3942;
174
+ color: #e9edef;
175
+ }
176
+ .stButton button {
177
+ border-radius: 50%; /* Make it circular */
178
+ width: 40px;
179
+ height: 40px;
180
+ padding: 0px;
181
+ background-color: #005c4b;
182
+ color: #e9edef;
183
+ font-size: 20px;
184
+ display: flex;
185
+ align-items: center;
186
+ justify-content: center;
187
+ border: none;
188
+ cursor: pointer;
189
+ }
190
+ .stButton button:hover {
191
+ background-color: #00735e;
192
+ }
193
+ div[data-testid="stToolbar"] {
194
+ display: none;
195
+ }
196
+ .stMarkdown {
197
+ color: #e9edef;
198
+ }
199
+ header {
200
+ background-color: #202c33 !important;
201
+ }
202
+ </style>
203
+ """, unsafe_allow_html=True)
204
+
205
+ if "chat_history" not in st.session_state:
206
+ st.session_state.chat_history = []
207
+
208
+ # Create a container for chat messages
209
+ chat_container = st.container()
210
+
211
+ # Create a form for input
212
+ with st.form(key="message_form", clear_on_submit=True):
213
+ col1, col2 = st.columns([5,1])
214
+ with col1:
215
+ user_input = st.text_input("", placeholder="Type a message...", label_visibility="collapsed")
216
+ with col2:
217
+ submit_button = st.form_submit_button("➤")
218
+
219
+ if submit_button and user_input.strip():
220
+ chat_history = get_chat_history()
221
+ query_bundle = QueryBundle(query_str=f"{chat_history}\n\nUser: {user_input}")
222
+ response_obj = query_engine.query(query_bundle)
223
+ response_text = str(response_obj.response) if hasattr(response_obj, "response") else str(response_obj)
224
+
225
+ memory.save_context({"query_str": user_input}, {"response": response_text})
226
+ st.session_state.chat_history.append(("You", user_input))
227
+ st.session_state.chat_history.append(("AI", response_text))
228
+
229
+ # Display chat history with custom styling
230
+ with chat_container:
231
+ for role, message in st.session_state.chat_history:
232
+ message = message.replace("</div>", "").replace("<div>", "") # Sanitize the message
233
+ if role == "You":
234
+ st.markdown(
235
+ f'<div class="message-container"><div class="user-message">{message}</div></div>',
236
+ unsafe_allow_html=True
237
+ )
238
+ else:
239
+ st.markdown(
240
+ f'<div class="message-container"><div class="ai-message">{message}</div></div>',
241
+ unsafe_allow_html=True
242
+ )
documents.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9499142a48b2d6bf8883ab59d27c7ba8465c6bbbf7eef6a4396aa1496d034589
3
+ size 25305
requirements.txt ADDED
Binary file (8.03 kB). View file