Spaces:

ak0601
/

Gemini_Chatbot

Sleeping

App Files Files Community

ak0601 commited on Dec 2, 2024

Commit

ececcd6

verified ·

1 Parent(s): a5f6193

Update app.py

Browse files

Files changed (1) hide show

app.py +171 -168

app.py CHANGED Viewed

@@ -1,168 +1,171 @@
-import os
-import streamlit as st
-import google.generativeai as genai
-# from langchain_openai import OpenAI /
-from langchain_openai import OpenAIEmbeddings
-from langchain_google_genai import GoogleGenerativeAIEmbeddings
-from langchain_google_genai import ChatGoogleGenerativeAI
-# from langchain_openai import OpenAIEmbeddings
-from langchain_community.document_loaders import Docx2txtLoader
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.vectorstores import Chroma
-from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
-from langchain_core.messages import HumanMessage, SystemMessage
-from langchain.chains import create_history_aware_retriever, create_retrieval_chain
-from langchain.chains.combine_documents import create_stuff_documents_chain
-from dotenv import load_dotenv
-from langchain.embeddings import HuggingFaceEmbeddings
-import pysqlite3
-import sys
-sys.modules['sqlite3'] = pysqlite3
-import os
-os.environ["TRANSFORMERS_OFFLINE"] = "1"
-# Retrieve OpenAI API key from the .env file
-GOOGLE_API_KEY = "AIzaSyC1-QUzA45IlCosX__sKlzNAgVZGEaHc0c"
-# GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
-# OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
-if not GOOGLE_API_KEY:
-    raise ValueError("Gemini API key not found. Please set it in the .env file.")
-# Set OpenAI API key
-os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
-# os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
-# Streamlit app configuration
-st.set_page_config(page_title="College Data Chatbot", layout="centered")
-st.title("PreCollege Chatbot GEMINI+ HuggingFace Embeddings")
-# Initialize OpenAI LLM
-llm = ChatGoogleGenerativeAI(
-    model="gemini-1.5-pro-latest",
-    temperature=0.2,  # Slightly higher for varied responses
-    max_tokens=None,
-    timeout=None,
-    max_retries=2,
-)
-# Initialize embeddings using OpenAI
-embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
-def load_preprocessed_vectorstore():
-    try:
-        loader = Docx2txtLoader("./Updated_structred_aman.docx")
-        documents = loader.load()
-        text_splitter = RecursiveCharacterTextSplitter(
-            separators=["\n\n", "\n", ". ", " ", ""],
-            chunk_size=3000,
-            chunk_overlap=1000)
-        document_chunks = text_splitter.split_documents(documents)
-        vector_store = Chroma.from_documents(
-            embedding=embeddings,
-            documents=document_chunks,
-            persist_directory="./data32"
-        )
-        return vector_store
-    except Exception as e:
-        st.error(f"Error creating vector store: {e}")
-        return None
-def get_context_retriever_chain(vector_store):
-    """Creates a history-aware retriever chain."""
-    retriever = vector_store.as_retriever()
-    # Define the prompt for the retriever chain
-    prompt = ChatPromptTemplate.from_messages([
-        MessagesPlaceholder(variable_name="chat_history"),
-        ("human", "{input}"),
-        ("system", """Given the chat history and the latest user question, which might reference context in the chat history,
-formulate a standalone question that can be understood without the chat history.
-If the question is directly addressed within the provided document, provide a relevant answer.
-If the question is not explicitly addressed in the document, return the following message:
-'This question is beyond the scope of the available information. Please contact your mentor for further assistance.'
-Do NOT answer the question directly, just reformulate it if needed and otherwise return it as is.""")
-    ])
-    retriever_chain = create_history_aware_retriever(llm, retriever, prompt)
-    return retriever_chain
-def get_conversational_chain(retriever_chain):
-    """Creates a conversational chain using the retriever chain."""
-    prompt = ChatPromptTemplate.from_messages([
-        ("system", """Hello! I'm your PreCollege AI assistant, here to help you with your JEE Mains journey.
-Please provide your JEE Mains rank and preferred engineering branches or colleges,
-and I'll give you tailored advice based on our verified database.
-Note: I will only provide information that is available within our database to ensure accuracy. Let's get started!
-"""
-         "\n\n"
-         "{context}"),
-        MessagesPlaceholder(variable_name="chat_history"),
-        ("human", "{input}")
-    ])
-    stuff_documents_chain = create_stuff_documents_chain(llm, prompt)
-    return create_retrieval_chain(retriever_chain, stuff_documents_chain)
-def get_response(user_query):
-    retriever_chain = get_context_retriever_chain(st.session_state.vector_store)
-    conversation_rag_chain = get_conversational_chain(retriever_chain)
-    formatted_chat_history = []
-    for message in st.session_state.chat_history:
-        if isinstance(message, HumanMessage):
-            formatted_chat_history.append({"author": "user", "content": message.content})
-        elif isinstance(message, SystemMessage):
-            formatted_chat_history.append({"author": "assistant", "content": message.content})
-    response = conversation_rag_chain.invoke({
-        "chat_history": formatted_chat_history,
-        "input": user_query
-    })
-    return response['answer']
-# Load the preprocessed vector store from the local directory
-st.session_state.vector_store = load_preprocessed_vectorstore()
-# Initialize chat history if not present
-if "chat_history" not in st.session_state:
-    st.session_state.chat_history = [
-        {"author": "assistant", "content": "Hello, I am Precollege. How can I help you?"}
-    ]
-# Main app logic
-if st.session_state.get("vector_store") is None:
-    st.error("Failed to load preprocessed data. Please ensure the data exists in './data' directory.")
-else:
-    # Display chat history
-    with st.container():
-        for message in st.session_state.chat_history:
-            if message["author"] == "assistant":
-                with st.chat_message("system"):
-                    st.write(message["content"])
-            elif message["author"] == "user":
-                with st.chat_message("human"):
-                    st.write(message["content"])
-    # Add user input box below the chat
-    with st.container():
-        with st.form(key="chat_form", clear_on_submit=True):
-            user_query = st.text_input("Type your message here...", key="user_input")
-            submit_button = st.form_submit_button("Send")
-        if submit_button and user_query:
-            # Get bot response
-            response = get_response(user_query)
-            st.session_state.chat_history.append({"author": "user", "content": user_query})
-            st.session_state.chat_history.append({"author": "assistant", "content": response})
-            # Rerun the app to refresh the chat display
-            st.rerun()
-""""""

+import os
+import streamlit as st
+import google.generativeai as genai
+# from langchain_openai import OpenAI /
+from langchain_openai import OpenAIEmbeddings
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
+from langchain_google_genai import ChatGoogleGenerativeAI
+# from langchain_openai import OpenAIEmbeddings
+from langchain_community.document_loaders import Docx2txtLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.vectorstores import Chroma
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+from langchain_core.messages import HumanMessage, SystemMessage
+from langchain.chains import create_history_aware_retriever, create_retrieval_chain
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from dotenv import load_dotenv
+from langchain.embeddings import HuggingFaceEmbeddings
+from sentence_transformers import SentenceTransformer
+import pysqlite3
+import sys
+sys.modules['sqlite3'] = pysqlite3
+import os
+os.environ["TRANSFORMERS_OFFLINE"] = "1"
+# Retrieve OpenAI API key from the .env file
+GOOGLE_API_KEY = "AIzaSyC1-QUzA45IlCosX__sKlzNAgVZGEaHc0c"
+# GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+# OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+if not GOOGLE_API_KEY:
+    raise ValueError("Gemini API key not found. Please set it in the .env file.")
+# Set OpenAI API key
+os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
+# os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
+# Streamlit app configuration
+st.set_page_config(page_title="College Data Chatbot", layout="centered")
+st.title("PreCollege Chatbot GEMINI+ HuggingFace Embeddings")
+# Initialize OpenAI LLM
+llm = ChatGoogleGenerativeAI(
+    model="gemini-1.5-pro-latest",
+    temperature=0.2,  # Slightly higher for varied responses
+    max_tokens=None,
+    timeout=None,
+    max_retries=2,
+)
+# Initialize embeddings using OpenAI
+embeddings = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
+def load_preprocessed_vectorstore():
+    try:
+        loader = Docx2txtLoader("./Updated_structred_aman.docx")
+        documents = loader.load()
+        text_splitter = RecursiveCharacterTextSplitter(
+            separators=["\n\n", "\n", ". ", " ", ""],
+            chunk_size=3000,
+            chunk_overlap=1000)
+        document_chunks = text_splitter.split_documents(documents)
+        vector_store = Chroma.from_documents(
+            embedding=embeddings,
+            documents=document_chunks,
+            persist_directory="./data32"
+        )
+        return vector_store
+    except Exception as e:
+        st.error(f"Error creating vector store: {e}")
+        return None
+def get_context_retriever_chain(vector_store):
+    """Creates a history-aware retriever chain."""
+    retriever = vector_store.as_retriever()
+    # Define the prompt for the retriever chain
+    prompt = ChatPromptTemplate.from_messages([
+        MessagesPlaceholder(variable_name="chat_history"),
+        ("human", "{input}"),
+        ("system", """Given the chat history and the latest user question, which might reference context in the chat history,
+formulate a standalone question that can be understood without the chat history.
+If the question is directly addressed within the provided document, provide a relevant answer.
+If the question is not explicitly addressed in the document, return the following message:
+'This question is beyond the scope of the available information. Please contact your mentor for further assistance.'
+Do NOT answer the question directly, just reformulate it if needed and otherwise return it as is.""")
+    ])
+    retriever_chain = create_history_aware_retriever(llm, retriever, prompt)
+    return retriever_chain
+def get_conversational_chain(retriever_chain):
+    """Creates a conversational chain using the retriever chain."""
+    prompt = ChatPromptTemplate.from_messages([
+        ("system", """Hello! I'm your PreCollege AI assistant, here to help you with your JEE Mains journey.
+Please provide your JEE Mains rank and preferred engineering branches or colleges,
+and I'll give you tailored advice based on our verified database.
+Note: I will only provide information that is available within our database to ensure accuracy. Let's get started!
+"""
+         "\n\n"
+         "{context}"),
+        MessagesPlaceholder(variable_name="chat_history"),
+        ("human", "{input}")
+    ])
+    stuff_documents_chain = create_stuff_documents_chain(llm, prompt)
+    return create_retrieval_chain(retriever_chain, stuff_documents_chain)
+def get_response(user_query):
+    retriever_chain = get_context_retriever_chain(st.session_state.vector_store)
+    conversation_rag_chain = get_conversational_chain(retriever_chain)
+    formatted_chat_history = []
+    for message in st.session_state.chat_history:
+        if isinstance(message, HumanMessage):
+            formatted_chat_history.append({"author": "user", "content": message.content})
+        elif isinstance(message, SystemMessage):
+            formatted_chat_history.append({"author": "assistant", "content": message.content})
+    response = conversation_rag_chain.invoke({
+        "chat_history": formatted_chat_history,
+        "input": user_query
+    })
+    return response['answer']
+# Load the preprocessed vector store from the local directory
+st.session_state.vector_store = load_preprocessed_vectorstore()
+# Initialize chat history if not present
+if "chat_history" not in st.session_state:
+    st.session_state.chat_history = [
+        {"author": "assistant", "content": "Hello, I am Precollege. How can I help you?"}
+    ]
+# Main app logic
+if st.session_state.get("vector_store") is None:
+    st.error("Failed to load preprocessed data. Please ensure the data exists in './data' directory.")
+else:
+    # Display chat history
+    with st.container():
+        for message in st.session_state.chat_history:
+            if message["author"] == "assistant":
+                with st.chat_message("system"):
+                    st.write(message["content"])
+            elif message["author"] == "user":
+                with st.chat_message("human"):
+                    st.write(message["content"])
+    # Add user input box below the chat
+    with st.container():
+        with st.form(key="chat_form", clear_on_submit=True):
+            user_query = st.text_input("Type your message here...", key="user_input")
+            submit_button = st.form_submit_button("Send")
+        if submit_button and user_query:
+            # Get bot response
+            response = get_response(user_query)
+            st.session_state.chat_history.append({"author": "user", "content": user_query})
+            st.session_state.chat_history.append({"author": "assistant", "content": response})
+            # Rerun the app to refresh the chat display
+            st.rerun()
+""""""