Spaces:

ak0601
/

Precollege_bot

Sleeping

App Files Files Community

ak0601 commited on Mar 5

Commit

c111348

verified ·

1 Parent(s): 664be02

Upload 3 files

Browse files

Files changed (3) hide show

app.py +242 -0
documents.pkl +3 -0
requirements.txt +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,242 @@

+import streamlit as st
+from langchain.memory import ConversationBufferMemory
+from llama_index.core.indices.query.schema import QueryBundle
+from llama_index.core import Document, VectorStoreIndex
+from llama_index.core.text_splitter import SentenceSplitter
+from llama_index.core.retrievers import QueryFusionRetriever
+from llama_index.retrievers.bm25 import BM25Retriever
+from llama_index.core.postprocessor import SentenceTransformerRerank
+from llama_index.core.prompts import PromptTemplate
+from llama_index.core.query_engine import RetrieverQueryEngine
+from llama_index.embeddings.gemini import GeminiEmbedding
+from llama_index.llms.gemini import Gemini
+from llama_index.core import Settings
+from llama_index.vector_stores.faiss import FaissVectorStore
+from llama_index.core import (
+    SimpleDirectoryReader,
+    load_index_from_storage,
+    VectorStoreIndex,
+    StorageContext,
+)
+from llama_index.core.node_parser import SemanticSplitterNodeParser
+import os
+import faiss
+import pickle
+import spacy
+# Load NLP model
+# nlp = spacy.load("en_core_web_sm")
+# Set API Key
+GOOGLE_API_KEY = "AIzaSyDRTL3H6EmqCMhsuD3nla5ZkNiwQDyuYbk"
+os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
+# Function to load documents
+def load_documents(filename="documents.pkl"):
+    with open(filename, "rb") as file:
+        return pickle.load(file)
+# Load stored documents
+loaded_docs = load_documents()
+# Function to split text into sentences
+# def spacy_sentence_splitter(text):
+#     doc = nlp(text)
+#     return [sent.text for sent in doc.sents]
+embed_model = GeminiEmbedding(model_name="models/embedding-001", use_async=False)
+splitter = SemanticSplitterNodeParser(
+    buffer_size=5, breakpoint_percentile_threshold=95, embed_model=embed_model
+)
+# splitter = SentenceSplitter(chunk_size=512, chunk_overlap=50, separator="\n")
+nodes = splitter.get_nodes_from_documents([doc for doc in loaded_docs])
+chunked_documents = [Document(text=node.text, metadata=node.metadata) for node in nodes]
+# Process documents
+# chunked_documents = [
+#     Document(text=chunk_text, metadata=doc.metadata)
+#     for doc in loaded_docs for chunk_text in spacy_sentence_splitter(doc.text)
+# ]
+# Configure LLM and embeddings
+Settings.llm = Gemini(model="models/gemini-2.0-flash", api_key=GOOGLE_API_KEY, temperature=0.5)
+dimension = 768
+faiss_index = faiss.IndexFlatL2(dimension)
+vector_store = FaissVectorStore(faiss_index=faiss_index)
+storage_context = StorageContext.from_defaults(vector_store=vector_store)
+# Build index
+index = VectorStoreIndex.from_documents(
+    documents=chunked_documents,
+    storage_context=storage_context,
+    embed_model=embed_model,
+    show_progress=True
+)
+index.storage_context.persist()
+# Initialize memory
+memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
+def get_chat_history():
+    return memory.load_memory_variables({})["chat_history"]
+# Define chatbot prompt template
+prompt_template = PromptTemplate(
+    """You are a friendly college counselor with expertise in Indian technical institutes.
+    Previous conversation context (if any):\n{chat_history}\n\n
+    Available college information:\n{context_str}\n\n"
+    User query: {query_str}\n\n
+    Instructions:\n
+    1. Provide a brief, direct answer using only the information available above\n
+    2. If specific data is not available, clearly state that\n
+    3. Keep responses under 3 sentences when possible\n
+    4. If comparing colleges, use bullet points for clarity\n
+    5. Use a friendly, conversational tone\n
+    6. Always be interactive and ask follow-up questions\n
+    7. Always try to give answers in points each point should focus on single aspect of the response.\n
+    8. Always try to give conclusion of your answer in the end for the user to take a decision.\n
+    Response:"""
+)
+# Configure retrieval and query engine
+vector_retriever = index.as_retriever(similarity_top_k=10)
+bm25_retriever = BM25Retriever.from_defaults(index=index, similarity_top_k=10)
+hybrid_retriever = QueryFusionRetriever(
+    [vector_retriever, bm25_retriever],
+    similarity_top_k=10,
+    num_queries=10,
+    mode="reciprocal_rerank",
+    use_async=False
+)
+reranker = SentenceTransformerRerank(
+    model="cross-encoder/ms-marco-MiniLM-L-2-v2",
+    top_n=10,
+)
+query_engine = RetrieverQueryEngine.from_args(
+    retriever=hybrid_retriever,
+    node_postprocessors=[reranker],
+    llm=Settings.llm,
+    verbose=True,
+    prompt_template=prompt_template,
+    use_async=False,
+)
+# Streamlit UI
+st.title("📚 Precollege Chatbot")
+st.write("Ask me anything about different colleges and their courses!")
+# Custom CSS for WhatsApp-like interface
+st.markdown("""
+<style>
+body {
+    background-color: #111b21;
+    color: #e9edef;
+}
+.stApp {
+    background-color: #111b21;
+}
+.chat-container {
+    padding: 10px;
+    color: #111b21;
+}
+.user-message {
+    background-color: #005c4b;
+    color: #e9edef;
+    padding: 10px 15px;
+    border-radius: 15px;
+    margin: 5px 0;
+    max-width: 70%;
+    margin-left: auto;
+    margin-right: 10px;
+}
+.ai-message {
+    background-color: #1f2c33;
+    color: #e9edef;
+    padding: 10px 15px;
+    border-radius: 15px;
+    margin: 5px 0;
+    max-width: 70%;
+    margin-right: auto;
+    margin-left: 10px;
+    box-shadow: 0 1px 2px rgba(255,255,255,0.1);
+}
+.message-container {
+    display: flex;
+    margin-bottom: 10px;
+}
+.stTextInput input {
+    border-radius: 20px;
+    padding: 10px 20px;
+    border: 1px solid #ccc;
+    background-color: #2a3942;
+    color: #e9edef;
+}
+.stButton button {
+    border-radius: 50%;  /* Make it circular */
+    width: 40px;
+    height: 40px;
+    padding: 0px;
+    background-color: #005c4b;
+    color: #e9edef;
+    font-size: 20px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    border: none;
+    cursor: pointer;
+}
+.stButton button:hover {
+    background-color: #00735e;
+}
+div[data-testid="stToolbar"] {
+    display: none;
+}
+.stMarkdown {
+    color: #e9edef;
+}
+header {
+    background-color: #202c33 !important;
+}
+</style>
+""", unsafe_allow_html=True)
+if "chat_history" not in st.session_state:
+    st.session_state.chat_history = []
+# Create a container for chat messages
+chat_container = st.container()
+# Create a form for input
+with st.form(key="message_form", clear_on_submit=True):
+    col1, col2 = st.columns([5,1])
+    with col1:
+        user_input = st.text_input("", placeholder="Type a message...", label_visibility="collapsed")
+    with col2:
+        submit_button = st.form_submit_button("➤")
+    if submit_button and user_input.strip():
+        chat_history = get_chat_history()
+        query_bundle = QueryBundle(query_str=f"{chat_history}\n\nUser: {user_input}")
+        response_obj = query_engine.query(query_bundle)
+        response_text = str(response_obj.response) if hasattr(response_obj, "response") else str(response_obj)
+        memory.save_context({"query_str": user_input}, {"response": response_text})
+        st.session_state.chat_history.append(("You", user_input))
+        st.session_state.chat_history.append(("AI", response_text))
+# Display chat history with custom styling
+with chat_container:
+    for role, message in st.session_state.chat_history:
+        message = message.replace("</div>", "").replace("<div>", "")  # Sanitize the message
+        if role == "You":
+            st.markdown(
+                f'<div class="message-container"><div class="user-message">{message}</div></div>',
+                unsafe_allow_html=True
+            )
+        else:
+            st.markdown(
+                f'<div class="message-container"><div class="ai-message">{message}</div></div>',
+                unsafe_allow_html=True
+            )

documents.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9499142a48b2d6bf8883ab59d27c7ba8465c6bbbf7eef6a4396aa1496d034589
+size 25305

requirements.txt ADDED Viewed

Binary file (8.03 kB). View file